# Contains all graphs/data that could not fit into the report

In [None]:
from pathlib import Path

PARENT_PATH = str(Path().resolve().parent) + "/"
PATH = "data/"
SUBPATH = "processed/"
FILE = "chicago_crimes-20230130-1108"
FORMAT = ".csv"

In [None]:
import altair as alt
from vega_datasets import data
alt.data_transformers.disable_max_rows()

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
import pandas as pd

df = pd.read_csv(PARENT_PATH + PATH + SUBPATH + FILE + FORMAT)

In [None]:
#Geopandas library to work with Chicago map
import geopandas as gpd

In [None]:
PARENT_PATH = str(Path().resolve().parent) + "/"
PATH = "data/"
SUBPATH = "external/"
FILE = "wards"
FORMAT = ".shp"

gdf = gpd.read_file(PARENT_PATH + PATH + SUBPATH + FILE + FORMAT)

In [None]:
df.head()

In [None]:
#districts crosstab // nicht übersichtlich

cross_table = pd.crosstab(df["district"], df["district"],
    margins=True,
    normalize=True,
    rownames=["District"],
    colnames=["Ergebnis"]
    )* 100


cross_table

In [None]:
#arrest crime crosstab in percent

cross_table = pd.crosstab(df["primary_type"], df["arrest"],
    margins=True,
    normalize=True,
    rownames=["Crime"],
    colnames=["Arrest"]
    )* 100


cross_table

In [None]:
#arrest homocide crosstab

cross_table = pd.crosstab(df["primary_type"] =="homicide" , df["arrest"],
    margins=True,
    normalize=True,
    rownames=["Crime"],
    colnames=["Arrest"]
    )* 100


cross_table

In [None]:
#arrest group crosstab

cross_table = pd.crosstab(df["primary_group"], df["arrest"],
    margins=True,
    normalize=True,
    rownames=["Group"],
    colnames=["Arrest"]
    )* 100


cross_table

In [None]:
homicide = alt.Chart(df).mark_bar().encode(
    x=alt.X("count(primary_type)"),
    y=alt.Y("arrest")
).transform_filter(
alt.FieldEqualPredicate(field='primary_type', equal="homicide")
).configure_axis(grid=False
).configure_view(strokeOpacity=0)



homicide

In [None]:
#wards with most crime

df["ward"].value_counts().nlargest(5)

In [None]:
selection = alt.selection_multi(fields=['ward'])

chart_2 = alt.Chart(df[~df['ward'].isna()]).mark_bar(size=20).encode(
    x=alt.X('ward:N',
            sort="-y",
            axis=alt.Axis(title="Ward",
                          titleAnchor="start", 
                          labelAngle=0)),
    y=alt.Y('count(primary_type)',
            axis=alt.Axis(title = "Count", 
                          titleAnchor="end")),
    tooltip="ward"
).properties(
    title='Count of commited crime in the wards',
    width=1500,
    height=400
).add_selection(
    selection
)


chart_2.configure_title(
    fontSize=16,
    font='Arial',
    color='black',
    anchor='start'
).configure_axis(grid=False
).configure_view(strokeOpacity=0)

In [None]:
ward_5 = alt.Chart(df[~df['ward'].isna()]).mark_bar(size=20).encode(
    x=alt.X('ward:N',
            sort="-y",
            axis=alt.Axis(title="Ward",
                          titleAnchor="start", 
                          labelAngle=0)),
    y=alt.Y('count(primary_type)',
            axis=alt.Axis(title = "Count", 
                          titleAnchor="end")),
            color=alt.condition(
        alt.FieldOneOfPredicate('ward', [42.0, 28.0, 24.0, 27.0, 6.0]),  # If the district is 11 this test returns True,
        alt.value('orange'),     # which sets the bar orange.
        alt.value('steelblue')   # And if it's not true it sets the bar steelblue.
    )              
).properties(
    title='Count of commited crime in the wards',
    width=1500,
    height=400
)


ward_5.configure_title(
    fontSize=16,
    font='Arial',
    color='black',
    anchor='start'
)

In [None]:
# nicht brauchbar wegen group_3
alt.Chart(df).mark_area().encode(
    x="hour:N",
    y=alt.Y("count(primary_group)", stack="normalize"),
    color="primary_group:N"
).properties(
    width=800,
    height=600
).configure_axis(grid=False
).configure_view(strokeOpacity=0)

In [None]:
#primary_group per district //nicht übersichtlich


alt.Chart(df).mark_bar().encode(
    x=alt.X('count(primary_group)', stack="normalize"),
    y='district:N',
    color='primary_group',
    tooltip=["primary_group"]
).properties(
    title='Distribution of primary_group per district',
    width=1000,
    height=800
).configure_axis(grid=False
).configure_view(strokeOpacity=0)

In [None]:
# This graph shows again which district is the most violent. 
# filter per district
districts = df['district'].unique() # get unique field values
districts = list(filter(lambda d: d is not None, districts)) # filter out None values
districts.sort() # sort alphabetically

selectDistrict = alt.selection_single(
    name='Select', # name the selection 'Select'
    fields=['district'], # limit selection to the Major_Genre field
    init={'district': districts[0]}, # use first genre entry as initial value
    bind=alt.binding_select(options=districts) # bind to a menu of unique genre values
)


chart = alt.Chart(df).mark_bar().add_selection(
    selectDistrict
).encode(
    x=alt.X("district:N",
    axis=alt.Axis(title="DISTRICT",  
                          titleAnchor="start", 
                          labelAngle=0)),
    y=alt.Y("count(primary_type)",
    axis=alt.Axis(title="COUNT",  
                          titleAnchor="end")),
    opacity=alt.condition(selectDistrict, alt.value(1.0), alt.value(0.10)),
    tooltip=["district", "count(primary_type)"]
).properties(
    title='Count of commited crime per district',
    width=1000,
    height=400
).configure_title(
    fontSize=16,
    font='Arial',
    color='black',
    anchor='start'
).configure_axis(grid=False
).configure_view(strokeOpacity=0)



chart

In [None]:
#Map of Chicago with Crimes as Dots on the Map

choro = alt.Chart(gdf).mark_geoshape(
    fill="black", stroke='white'
).encode()


p = alt.Chart(df).mark_square(opacity=0.3).encode(
    longitude='longitude',
    latitude='latitude',
    size=alt.value(10),
    color="count(primary_group)",
    tooltip=["district", "block"]
).properties(
    title="Location of crimes in Chicago City",
    width=1000,
    height=1000
)

choro + p

In [None]:
brush = alt.selection(type='interval')

points = alt.Chart(df).mark_square(opacity=0.3).encode(
    longitude='longitude',
    latitude='latitude',
    size=alt.value(10),
    color=alt.condition(brush, "primary_group", alt.value("lightgrey")),
    tooltip=["district", "block"]
).add_selection(
    brush
).properties(
    title="Location of crimes in Chicago City",
    width=1000,
    height=1000
)

bars = alt.Chart(df).mark_bar().encode(
    y='primary_group:N',
    color='primary_gropup:N',
    x='count(primary_group):Q'
).transform_filter(
    brush
)

points & bars

In [None]:
#Map of Chicago with Crimes as Dots on the Map


choro = alt.Chart(gdf).mark_geoshape(
    fill="white", stroke='grey'
).encode()

input_radio = alt.binding_radio(options=['group_1','group_2','group_3'], name='Select_Group: ')
selection = alt.selection_single(fields=['primary_group'], bind=input_radio)

p = alt.Chart(df).mark_square(opacity=0.3).encode(
    longitude='longitude',
    latitude='latitude',
    size=alt.value(10),
    color="primary_group:N",
    tooltip=["district", "block", "primary_type"]
).add_selection(
    selection
).transform_filter(
    selection
).properties(
    title="Location of crimes in Chicago City",
    width=1000,
    height=1000
)


layer = alt.layer(choro + p
).configure_title(
    fontSize=16,
    font='Arial',
    color='black',
    anchor='start'
).configure_axis(grid=False
).configure_view(strokeOpacity=0)

layer


In [None]:
district_5 = alt.Chart(df).mark_bar().encode(
    x=alt.X("district:N",
    sort="-y",
    axis=alt.Axis(title="DISTRICT",  
                          titleAnchor="start", 
                          labelAngle=0)),
    y=alt.Y("count(primary_type):Q",
    axis=alt.Axis(title="COUNT",  
                          titleAnchor="end")),
    color=alt.condition(
        alt.FieldOneOfPredicate('district', [11, 6, 8, 1, 18]),  # If the district is 11 this test returns True,
        alt.value('orange'),     # which sets the bar orange.
        alt.value('steelblue')   # And if it's not true it sets the bar steelblue.
    ),
    tooltip=["count(primary_type)"]
).properties(
    title='Count of commited crime in the districts',
    width=1000,
    height=400
).configure_title(
    fontSize=16,
    font='Arial',
    color='black',
    anchor='start'
).configure_axis(grid=False
).configure_view(strokeOpacity=0)

district_5

In [None]:
#primary_type per district

order_crime = ["theft", "assault_and_battery","criminal_damage", "deceptive_practice", "burglary", "other_offense", "robbery_and_weapons", "narcotics", "homicide", "sexual_crime"]

alt.Chart(df).mark_bar().encode(
    x=alt.X('count(primary_type)', stack="normalize",
    axis=alt.Axis(format="%",title = "PERCENT", 
                          titleAnchor="start")),
    y=alt.Y('district:N',
    axis=alt.Axis(title = "DISTRICT", 
                          titleY=25)),
    color=alt.Color('primary_type', sort=order_crime),
    tooltip=["primary_type", alt.Tooltip('count(primary_type)', title='count')]
).properties(
    title='Distribution of crime types per district',
    width=1000,
    height=800
).configure_title(
    fontSize=16,
    font='Arial',
    color='black',
    anchor='start'
).configure_axis(grid=False
).configure_view(strokeOpacity=0)

In [None]:
display(df[(df['district']==1) & (df['block'] == "state_st") & (df['primary_group'] == "group_1")]) #951 Fälle für state street in district 1

In [None]:
display(df[(df['block'] == "ashland_ave")]) # kedzie_ave 92, pulaski_rd 213, western_ave 27, madison_st 274