In [7]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 50)

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
import seaborn as sns
sns.set()

In [8]:
import plotly.express as px
import json
from urllib.request import urlopen
import matplotlib.image as mpimg

TESTING = False
DATA_DIR  = "../data/"
# This is the output file from ../prepare_311_data/prepare_311_data.sh
FILE_NAME = "pairwise_spatial_dtw.json"
with open(DATA_DIR+FILE_NAME) as f:
    distances = json.load(f)

In [8]:
geojson_link = "https://data.cityofnewyork.us/api/geospatial/pri4-ifjk?method=export&format=GeoJSON"

import json
from urllib.request import urlopen

with urlopen(geojson_link) as response:
    geojson = json.load(response)

In [14]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

filenames = [#"pairwise_spatial_dtw.json",
             #pairwise_spatial_ed.json",
             "pairwise_spatial_dtw_noise.json",
             "pairwise_spatial_ed_noise.json"
             ]  # TODO: Other files

def create_year_choropleths(fig, filename, row):
    with open(DATA_DIR+filename) as f:
        distances = json.load(f)
        
    #fig = make_subplots(1, 4)
    for col, (year, year_dist) in enumerate(distances.items(), 1):
        year_dist = pd.DataFrame({"zcta": year_dist.keys(),
                                  "distance": year_dist.values()})
        plotly_fig = px.choropleth_mapbox(
            year_dist, geojson=geojson, locations='zcta', 
            color="distance",
            color_continuous_scale="Viridis", 
            featureidkey="properties.modzcta", 
            #labels={"percent_diff": "% difference 2019 to 2020"},
            mapbox_style="carto-positron",
            zoom=9, center={"lat": 40.7, "lon": -74},
            opacity=0.7, title=f"{filename} - {year}",
            hover_name="zcta"
        )
        plotly_fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
        img_name = f"1_5_{filename}_{year}.png"
        plotly_fig.write_image(img_name)
        continue
        # None of the below worked :)
        loaded_img = mpimg.imread(img_name)
        # We use go.Image because subplots require traces, whereas px functions return a figure
        fig.add_trace(go.Image(z=loaded_img), 1, col)

        # Create the matplotlib figure
        ax = fig.add_subplot(len(filenames), 4, row*4 + col)
        ax.imshow(loaded_img)
        ax.grid(None)
        ax.set_title(year)
    #return fig

In [15]:
base_size = 50
#fig, ax = subplots(figsize=(len(filename), 4*base_size))
fig = plt.figure(figsize=(len(filenames)*base_size, 4*base_size))

for row, file_name in enumerate(filenames, 1):
    create_year_choropleths(fig, file_name, 0)
#fig.update_layout(height=400)
#fig.show()

<Figure size 7200x14400 with 0 Axes>

In [None]:
for col, (year, year_dist) in enumerate(distances.items(), 1):
    img_name = f"1_5_{filename}_{year}.png"
    loaded_img = mpimg.imread(img_name)
    fig = px.imshow(loaded_img)#, facet_col=0, binary_string=True,
    #             labels={'facet_col':'sigma'})
fig.show()

In [2]:

CENSUS_NAME = "census_data.csv"
complaints = pd.read_csv(DATA_DIR+FILE_NAME, 
                         parse_dates=["date"], 
                         cache_dates=True,
                         infer_datetime_format=True
)
census = pd.read_csv(
    DATA_DIR+CENSUS_NAME, 
    names=["geo", "zcta", "median_earning", "full_time_median", "full_time_mean"],
    usecols=[1, 2, 3, 4],
)
num_cols = "median_earning", "full_time_median", "full_time_mean"
for col in num_cols:
    census[col] = pd.to_numeric(census[col], errors="coerce")

In [6]:
start_month = "03"
end_month = "09"
noise_complaints_covid = complaints.query(f"""
    ("2019-{start_month}-01" <= date and date <= "2019-{end_month}-01") \
    or \
    ("2020-{start_month}-01" <= date and date <= "2020-{end_month}-01")
""")
noise_complaints_covid = noise_complaints_covid.loc[
    noise_complaints_covid["type"].str.contains("noise", case=False)
]
amount_zips = noise_complaints_covid["zcta"].nunique()

In [9]:
noise_complaints_covid.to_csv("notebook_1.3_noise_complaints_covid.csv", index=False)

In [10]:
if TESTING:
    noise_complaints_covid.loc[len(noise_complaints_covid)] = \
        [pd.to_datetime("2020-02-18"), "Noise testing", "noise testing", "11211"]

In [102]:
grouper = noise_complaints_covid.groupby([
    "zcta", pd.Grouper(freq='1Y', key="date"),
], as_index=False).size()
grouper = grouper.pivot(index="zcta", columns="date", values="size")
for col in grouper.columns:
    print(f"Year {col.year} has {grouper[col].isna().sum()} zcta's with no complaints")
grouper = grouper.fillna(0)
grouper.columns = ["2019", "2020"]

Year 2019 has 4 zcta's with no complaints
Year 2020 has 8 zcta's with no complaints


In [103]:
zcta_year_null = grouper.loc[(grouper["2019"] == 0)].index

In [104]:
grouper["percent_diff"] = (grouper["2020"] - grouper["2019"])
grouper["percent_diff"] /= grouper["2019"]
grouper = grouper.merge(census, how="inner", on="zcta")
grouper["percent_diff"].replace(np.inf, np.nan, inplace=True)
grouper.head(2)

Unnamed: 0,zcta,2019,2020,percent_diff,median_earning,full_time_median,full_time_mean
0,10001,1487.0,1498.0,0.007397,74878.0,93452.0,136402.0
1,10002,3002.0,3901.0,0.299467,37348.0,55285.0,75269.0


In [105]:
grouper.max()

zcta                 11697.000000
2019                  5692.000000
2020                 43725.000000
percent_diff            24.347826
median_earning      162650.000000
full_time_median    210853.000000
full_time_mean      263867.000000
dtype: float64

In [106]:
grouper.sort_values("median_earning", ascending=False, inplace=True)
grouper.reset_index(inplace=True)

In [107]:
grouper_subset = grouper.query("""zcta not in @zcta_year_null \
    and median_earning == median_earning""")

In [4]:
import plotly.express as px
px.scatter(grouper_subset, x="percent_diff", y="median_earning", hover_name="zcta")

NameError: name 'hi' is not defined

In [5]:
hi
zcta_remove = list(zcta_year_null) + [11109, 10466]
grouper_subset2 = grouper.query("""zcta not in @zcta_remove \
    and median_earning == median_earning""")
fig = px.bar(grouper_subset2, y="median_earning", color="percent_diff", 
        color_continuous_scale=px.colors.diverging.RdBu,
        color_continuous_midpoint=0, hover_name="zcta"
)
fig.update_layout(title_text="Median earning color coded by 2020% difference from 2019")

NameError: name 'hi' is not defined

In [110]:
fig.write_image("1.3-Noise_complaints_time_wealth_bar_plot.png")

In [86]:
geojson_link = "https://data.cityofnewyork.us/api/geospatial/pri4-ifjk?method=export&format=GeoJSON"

import json
from urllib.request import urlopen

with urlopen(geojson_link) as response:
    geojson = json.load(response)

In [6]:
hi
fig = px.choropleth_mapbox(
    grouper_subset2, geojson=geojson, locations='zcta', 
    color="percent_diff",
    color_continuous_scale=px.colors.diverging.RdBu,
    color_continuous_midpoint=0, 
    featureidkey="properties.modzcta", 
    labels={"percent_diff": "% difference 2019 to 2020"},
    mapbox_style="carto-positron",
    zoom=9, center={"lat": 40.7, "lon": -74},
    opacity=0.7, title="% difference in noise complaints due to covid per ZCTA",
    hover_name="zcta"
)
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.show()

NameError: name 'hi' is not defined

In [116]:
fig.write_image("1.3-Noise_complaints_time_wealth_choropleth.png")