In [None]:
import geopandas as gpd
import altair as alt
import seaborn as sns
import pandas as pd
import scipy.stats as stats

# craate a string to hold the path to the export folder
export_folder = r'C:\Users\Kaninik\OneDrive - Delft University of Technology\MSc Cosem\Q3\Gedeelde documenten - Group 4 - Data Analytics\General\Report Part 1\Graphs'

In [None]:

import pickle

with open('data.pickle', 'rb') as f:
    data = pickle.load(f, encoding='binary')


In [None]:
# # Filtered the data to only include the year 2015
# data = data.loc[data['OCC'].dt.year == 2016]
# data = data.loc[data['VIC_SEX'] != 'D']
# data = data.loc[data['VIC_SEX'] != 'E']
# data.info()


# Add a column for the year
data['Year'] = data['OCC'].dt.year

In [None]:
# Create a box-plot of the number of crimes per precinct
crime_per_precinct = data.groupby(['ADDR_PCT_CD']).size().reset_index(name='Count')

# Create a box-plot of the number of crimes per precinct using seaborn
bar_plot_precincts = alt.Chart(crime_per_precinct).mark_boxplot().encode(
    y = alt.Y('Count:Q', title='Number of crimes'),
)

# save the altair plot to a png file in the export folder
bar_plot_precincts.properties(width=500, height=500)

In [None]:

precint_counts = data.groupby(['ADDR_PCT_CD','Year']).size().reset_index(name='crime_count')
precint_counts.head(5)

# Convert Year and crime_count to int
precint_counts['Year'] = precint_counts['Year'].astype(int)
precint_counts['crime_count'] = precint_counts['crime_count'].astype(int)

# convert ADDR_PCT_CD to int
precint_counts['ADDR_PCT_CD'] = precint_counts['ADDR_PCT_CD'].astype(int)
precint_counts['ADDR_PCT_CD'] = precint_counts['ADDR_PCT_CD'].astype(str)
precint_counts.head(5)


In [None]:
# number of total unique precincts
precincts = precint_counts['ADDR_PCT_CD'].unique()
len(precincts)

In [None]:
# Create a Streamgraph to show the changes in crimes over years
alt.Chart(precint_counts).mark_line().encode(
    alt.X('Year:O', axis = alt.Axis(title = 'Year')),
    alt.Y('crime_count:Q', stack = "zero", axis = alt.Axis(title = 'Crime Count')),
    alt.Color('ADDR_PCT_CD:O', scale=alt.Scale(scheme='lighttealblue'),title = 'Precinct')
).properties(
    width = 1000,
    height = 500
)

In [None]:
# load precinct shapefile
precint_shape = gpd.read_file('..//data//Police Precincts.geojson')
# merge the two dataframes
precint_shape = precint_counts.merge(precint_shape, right_on='precinct', left_on='ADDR_PCT_CD')
precint_shape = gpd.GeoDataFrame(precint_shape, geometry='geometry')
precint_shape.info()

In [None]:
# plot the data using column 'counts' as the color and with one map for each year
alt.Chart(precint_shape).mark_geoshape().encode(
    color='precinct',
).properties(
    width=500,
    height=500,
)

In [None]:
# plot the data using column 'counts' as the color and with one map for each year
alt.Chart(precint_shape).mark_geoshape().encode(
    color='crime_count:Q',
).properties(
    width=500,
    height=300,
)

In [None]:
precint_w_trees = gpd.read_file('..//data//precincts_with_trees.geojson')

# plot the data using column 'counts' as the color
alt.Chart(precint_w_trees).mark_geoshape().encode(
    color='tree_count:Q'
).properties(
    width=500,
    height=300
)


In [None]:
# merge the trees and crime data
precint_w_trees = precint_w_trees.merge(precint_counts, left_on='precinct', right_on='ADDR_PCT_CD')

In [None]:
# create a box plot of the crime count and the tree count
alt.Chart(precint_w_trees).mark_boxplot().encode(
    y='crime_count:Q'
)

In [None]:
# create a scatter plot of the data with the number of trees on the x axis and the number of crimes on the y axis
alt.Chart(precint_w_trees).mark_circle(
    size=50,
    color='red',
).encode(
    x = alt.X('tree_count', axis= alt.Axis(title = 'Trees')),
    y = alt.Y('crime_count', axis= alt.Axis(title = 'Crimes')),
    tooltip = ['precinct', 'tree_count', 'crime_count'],
).properties(
    width=1000,
    height=500,
    title='Number of Crimes vs Number of Trees'
)

In [None]:
stats.pearsonr(precint_w_trees['tree_count'], precint_w_trees['crime_count'])

In [None]:
# get he area of each precinct
precint_w_trees.to_crs(epsg=2263, inplace=True)
precint_w_trees['area'] = precint_w_trees['geometry'].area

precint_w_trees['area'].head(5)

In [None]:
# calculate the number of trees per square kilometer and add it to the dataframe
precint_w_trees['tree_density'] = precint_w_trees['tree_count'] / (precint_w_trees['area'] / 1000000)
precint_w_trees['crime_density'] = precint_w_trees['crime_count'] / (precint_w_trees['area'] / 1000000)

In [None]:
# create a scatter plot of the data with the density of trees on the x axis and the density of crimes on the y axis
fig = alt.Chart(precint_w_trees).mark_circle(
    size=50,
    color='red',
).encode(
    x = alt.X('tree_density:Q', axis= alt.Axis(title = 'Trees')),
    y = alt.Y('crime_density:Q',axis= alt.Axis(title = 'Crimes per sq.km')),
    tooltip = ['precinct', 'tree_count', 'crime_count'],
).properties(
    width=1000,
    height=500,
    title='Number of Crimes vs Number of Trees'
)

final_plot = fig + fig.transform_regression('tree_density','crime_density').mark_line()
final_plot

In [None]:
stats.pearsonr(precint_w_trees['tree_density'], precint_w_trees['crime_density'])

In [None]:

alt.Chart(precint_w_trees).mark_circle().encode(
    alt.X(alt.repeat("column"), type='quantitative'),
    alt.Y(alt.repeat("row"), type='quantitative'),
).properties(
    width=300,
    height=300
).repeat(
    row=['tree_density', 'crime_density', 'area'],
    column=['tree_density', 'crime_density', 'area']
)

In [None]:
# create a box plot of the crime count and the tree count
bplot_crimedensity = alt.Chart(precint_w_trees).mark_boxplot().encode(
    y='crime_density'
)

b_plot_treedensity = alt.Chart(precint_w_trees).mark_boxplot().encode(
    y='tree_density'
)

bplot_crimedensity | b_plot_treedensity

