In [2]:
# Dependencies and Setup
%matplotlib inline
from config import gkey
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import gmaps
import json
import scipy.stats as st
import requests
from scipy.stats import linregress
from sklearn import linear_model

# Configure gmaps
gmaps.configure(api_key=gkey)

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# Files to Load
potholes = pd.read_csv("Resources/311_Service_Requests_-_Pot_Holes_Reported_-_Historical-2016.csv")
crime = pd.read_csv("Resources/Crimes_-_2001_to_present(2016).csv")
parks = pd.read_csv("Resources/CPD_Parks.csv")
socio = pd.read_csv("Resources/Census_Data_-_Selected_socioeconomic_indicators_in_Chicago__2008___2012.csv")

In [None]:
#JP Read CSVs
crime_141516_data=pd.read_csv(r"C:\Users\The Doctor\Documents\GitHub\ParksAndPotholes\Resources\crime_141516_filtered.csv")
crime_14_data=pd.read_csv(r"C:\Users\The Doctor\Documents\GitHub\ParksAndPotholes\Resources\Crimes_2014_filtered.csv")
crim_de_15_data=pd.read_csv(r"C:\Users\The Doctor\Documents\GitHub\ParksAndPotholes\Resources\Crimes_2015_filtered.csv")
crime_16_data=pd.read_csv(r"C:\Users\The Doctor\Documents\GitHub\ParksAndPotholes\Resources\Crimes_2016_filtered.csv")
park_data=pd.read_csv(r"C:\Users\The Doctor\Documents\GitHub\ParksAndPotholes\Resources\cpd_parks_filtered.csv")

In [None]:
#JP Parks Data
park_acres = park_data.groupby(['WARD'])
park_acres_ward_sum=park_data.groupby('WARD')[['ACRES']].sum()
park_acres_ward_sum_indexed = park_acres_ward_sum.reset_index()
park_acres_ward_sum_indexed.drop([0])

# Set x axis and tick locations
x_axis = np.arange(len(park_acres_ward_sum_indexed))
tick_locations = [value+0.4 for value in x_axis]

#need to rename WARD to ward
park_acres_ward_sum_indexed.columns = ['Ward', 'Acres']
park_acres_ward_sum_indexed2=park_acres_ward_sum_indexed.drop([0])

In [None]:
#JP Parks Histogram
plt.figure(figsize=(20,3))
plt.bar(x_axis, park_acres_ward_sum_indexed2["Acres"], color='r', alpha=0.5, align="edge")
plt.xticks(tick_locations, park_acres_ward_sum_indexed2["Ward"], rotation="vertical")

# Set a Title and labels
plt.title("PARD ACREAGE BY WARD")
plt.xlabel("WARD")
plt.ylabel("PARK ACREAGE")

#plt.show()
plt.savefig("acre_ward.png")

In [None]:
#JP Crime DFs
#OK dataframeize 2016 crime
crime_16_data_count=crime_16_data.groupby('Ward')[['Primary Type']].count()
crime_16_data_count_indexed = crime_16_data_count.reset_index()

#merging datasets by ward
merge_pc_2016 = pd.merge(crime_16_data_count_indexed, park_acres_ward_sum_indexed2, on="Ward")

In [None]:
#JP Crimes Histogram
# Set x axis and tick locations
x_axis = np.arange(len(crime_16_data_count_indexed))
tick_locations = [value+0.4 for value in x_axis]

plt.figure(figsize=(20,3))
plt.bar(x_axis, crime_16_data_count_indexed["Primary Type"], color='r', alpha=0.5, align="edge")
plt.xticks(tick_locations, crime_16_data_count_indexed["Ward"], rotation="vertical")
# Set a Title and labels
plt.title("TOP 5 CRIMES COUNT BY WARD")
plt.xlabel("WARD")
plt.ylabel("CRIME COUNT")
plt.savefig("Top5Crime.png")
#plt.show()

In [None]:
#JP Combined Histogram

# Set x axis and tick locations
x_axis = np.arange(len(merge_pc_2016))
tick_locations = [value+0.4 for value in x_axis]

plt.figure(figsize=(20,3))
plt.bar(x_axis, merge_pc_2016["Primary Type"], color='r', alpha=0.5, align="edge", label='Crime Count')
plt.bar(x_axis, merge_pc_2016["Acres"], color='b', alpha=0.5, align="edge", label='Park Acreage')

plt.xticks(tick_locations, merge_pc_2016["Ward"], rotation="vertical")

# Set a Title and labels
plt.title("CRIME BY WARD")
plt.xlabel("WARD")
plt.ylabel("Whatever")
plt.legend()
plt.show()

In [None]:
#YZ set up

# Files to Load
population = pd.read_csv("Resources/Census-Data-by-Chicago-Community-Area-2016.csv")
sociecn = pd.read_csv("Resources/Census_Data_-_Selected_socioeconomic_indicators_in_Chicago__2008___2012.csv")
sociecn = sociecn[:-1]
sociecn['Community Area Number'] = sociecn['Community Area Number'].astype(int)
sociecn = sociecn.rename(columns = {'Community Area Number':'Community Area'})

In [None]:
#YZ crime

# Get crime counts and grouped by community

# ***FBI code specifies the crime type, consider if need to use***

crime = crime[['Ward', 'Community Area']]
group_community = crime.groupby(['Community Area'], as_index=False)
crime_group = group_community.count()
crime_group = crime_group.rename(columns = {'Ward':'Crime Count'})

In [None]:
# YZ Merge datasets
combine = crime_group.merge(sociecn, on = 'Community Area', how = 'outer')\
.merge(population, on = 'Community Area', how = 'outer')
combine = combine.drop(columns = 'COMMUNITY AREA NAME')
combine = combine.reindex(sorted(combine.columns), axis=1)

# Adjusting crime by population
combine['Crime Count Adjusted'] = combine['Crime Count'] / combine['population']

In [None]:
#YZ Scatterplot

# Get correlations and scatter plot
correlation = st.pearsonr(combine.iloc[:, 8], combine['Crime Count Adjusted'])
print(f"The correlation between both factors is {round(correlation[0],2)}")
plt.scatter(combine.iloc[:, 8], combine['Crime Count Adjusted'])
plt.xlabel('% Households Below Poverty')
plt.ylabel('Crime Count Adjusted')
plt.grid()
plt.show()

In [None]:
#YZ getting df for heatmap
community_lnglat = combine[['Community', 'Community Area']]
# Add columns for lat, lng, airport name, airport address, airport rating
# Note that we used "" to specify initial entry.
community_lnglat["Lat"] = ""
community_lnglat["Lng"] = ""

# create a params dict that will be updated with new city each iteration
params = {"key": gkey}

# Loop through the cities_pd and run a lat/long search for each city
for index, row in community_lnglat.iterrows():
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"

    community = row['Community']

    # update address key value
    params['address'] = f"{community},Chicago,Illinois"

    # make request
    communities_lat_lng = requests.get(base_url, params=params)
    
    # convert to json
    communities_lat_lng = communities_lat_lng.json()

    community_lnglat.loc[index, "Lat"] = communities_lat_lng["results"][0]["geometry"]["location"]["lat"]
    community_lnglat.loc[index, "Lng"] = communities_lat_lng["results"][0]["geometry"]["location"]["lng"]

# Visualize to confirm lat lng appear
community_lnglat.head()

In [None]:
#YZ Heatmap

# Store latitude, longitude, and weights
locations = community_lnglat[['Lat', 'Lng']]
poverty = combine.iloc[:, 8]

# Plot Heatmap
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights = poverty,
                                 dissipating=False, max_intensity=28,
                                 point_radius = 0.016)

fig.add_layer(heat_layer)
fig

In [None]:
#VC dfs

#pothole df
potholes_df=potholes.groupby(['Community Area']).count()
potholes_df.reset_index(inplace=True)
potholes_df=potholes_df.drop(potholes_df.index[0])
y=potholes_df['CREATION DATE']

#crime df
crime_df=crime.groupby(['Community Area']).count()
x=crime_df['Case Number']

In [None]:
#VC scatterplot

plt.scatter(x,y)
slope, intercept, r_value, p_value, std_err = stats.linregress(x,y)
line = slope*x+intercept
plt.plot(x,y,'o', x, line)

In [None]:
#VC stats

covariance=cov(x,y)

corr, _ = pearsonr(x, y)
print('Pearsons correlation: %.3f' % corr)

In [None]:
#DS df & scatterplot

group_potholes = potholes.groupby("Community Area").count()

# Plot out potholes v socioeconomic status by ward
x_values = group_potholes['NUMBER OF POTHOLES FILLED ON BLOCK']
y_values = socio['Community Area Number']
plt.scatter(x_values,y_values)
plt.xlabel('Potholes(filled)')
plt.ylabel('Average Income ($)')
plt.show()

In [None]:
#DS Linear Regression 
x_values = group_potholes['NUMBER OF POTHOLES FILLED ON BLOCK']
y_values = socio['Community Area Number']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Potholes(reported)')
plt.ylabel('Average Income ($)')
print(f"The r-squared is: {rvalue}")
plt.show()

In [11]:
#MK dfs

cleanpotholes=potholes.dropna(subset=['NUMBER OF POTHOLES FILLED ON BLOCK', 'LATITUDE', 'LONGITUDE'])
potlocations = cleanpotholes[["LATITUDE", "LONGITUDE"]]
potholenumber = cleanpotholes["NUMBER OF POTHOLES FILLED ON BLOCK"].astype(float)

In [18]:
#MK heatmap
fig2 = gmaps.figure()

# Create heat layer
heat_layer2 = gmaps.heatmap_layer(potlocations, weights=potholenumber, 
                                 dissipating=False, max_intensity=10,
                                 point_radius=.0015)

# Add layer
fig2.add_layer(heat_layer2)

# Display figure
fig2

Figure(layout=FigureLayout(height='420px'))