# Boston Crimes

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
pd.set_option('display.max_rows', None)
import datetime
from plotly.subplots import make_subplots

In [None]:
data = pd.read_csv('crime.csv', encoding='latin')
data[:5]

In [None]:
def treemap(categories, title, path, values):
    fig = px.treemap(categories, path=path, values=values, height=700,
                 title=title, color_discrete_sequence = px.colors.sequential.RdBu)
    fig.data[0].textinfo = 'label+text+value'
    fig.show()

In [None]:
def histogram(data, path, color, title, xaxis: str, yaxis: str):
    fig = px.histogram(data, x=path, color=color)
    fig.update_layout(
        title_text=title,
        xaxis_title_text=xaxis, 
        yaxis_title_text=yaxis, 
        bargap=0.2, 
        bargroupgap=0.1
    )
    fig.show()

In [None]:
def bar(categories, x, y, color, title, xlab, ylab):
    fig = px.bar(categories, x=x, y=y,
             color=color,
             height=400)
    fig.update_layout(
    title_text=title, 
    xaxis_title_text=xlab, 
    yaxis_title_text=ylab,
    bargap=0.2, 
    bargroupgap=0.1
    )
    fig.show()

In [None]:
# Number of crimes per category
Number_crimes = data['OFFENSE_CODE_GROUP'].value_counts()
values = Number_crimes.values
categories = pd.DataFrame(data=Number_crimes.index, columns=["OFFENSE_CODE_GROUP"])
categories['values'] = values
categories[:5]

In [None]:
treemap(categories,'Major Crimes in Boston',['OFFENSE_CODE_GROUP'],categories['values'])

In [None]:
histogram(data,"OFFENSE_CODE_GROUP","OFFENSE_CODE_GROUP",'Major Crimes in Boston','Crime','Count')

In [None]:
bar(categories,categories['OFFENSE_CODE_GROUP'][0:10],categories['values'][0:10]
    ,categories['OFFENSE_CODE_GROUP'][0:10],'Top 10 Major Crimes in Boston','Crime','Count')

In [None]:
Number_crimes_year = data['YEAR'].value_counts()
years = pd.DataFrame(data=Number_crimes_year.index, columns=["YEAR"])
years['values'] = Number_crimes_year.values
years

In [None]:
fig = px.pie(years, values='values', names='YEAR', color_discrete_sequence=px.colors.sequential.RdBu)
fig.show()

In [None]:
histogram(data,"DISTRICT","MONTH",'Crime count per Month on each district', 'District', 'Crimes Count on each Day')

### Exercise 1
Use any plots for this exercise, can use different plots for different exercises.



1. What are the months with highest crime rate? Plot the data.


In [None]:
series_crimes = data["MONTH"].value_counts().sort_index()
df_crimes = pd.DataFrame({'MONTH': series_crimes.index, 'COUNT': series_crimes.values})
bar(df_crimes, "MONTH", "COUNT", 'MONTH', 'Crime count per District on each Month', 'Month', 'Crimes Count on each Day')


2. What are the days of the week with highest crime rate? Plot the data.


In [None]:
series_weekday_crimes = data['DAY_OF_WEEK'].value_counts()
df_crimes = pd.DataFrame({'DAY_OF_WEEK': series_weekday_crimes.index, 'COUNT': series_weekday_crimes.values})
bar(df_crimes, "DAY_OF_WEEK", "COUNT", 'DAY_OF_WEEK', 'Crime count on each Day', 'Day', 'Crimes Count on each Day')


3. What are the districts with highest crime rate? Plot the data.


In [None]:
series_weekday_crimes = data['DISTRICT'].value_counts()
df_crimes = pd.DataFrame({'DISTRICT': series_weekday_crimes.index, 'COUNT': series_weekday_crimes.values})
bar(df_crimes, "DISTRICT", "COUNT", 'DISTRICT', 'Crime count on each DISTRICT', 'District', 'Crimes Count')


4. Plot the crime rate per district per day of week.

In [None]:
histogram(data,"DISTRICT","DAY_OF_WEEK",'Crime count per Day on each district', 'District', 'Crimes Count')

### Map 

In [None]:
%%capture
%pip install folium

In [None]:
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster


In [None]:
# Create a map centered on Boston
m_1 = folium.Map(location=[42.32,-71.0589], tiles='openstreetmap', zoom_start=10)

# Display the map
m_1

In [None]:
import pandas as pd
# Load the data
crimes = pd.read_csv("crime.csv", encoding='latin-1')
crimes

# Drop rows with missing locations
crimes.dropna(subset=['Lat', 'Long', 'DISTRICT'], inplace=True)

# Focus on major crimes
crimes = crimes[crimes.OFFENSE_CODE_GROUP.isin([
    'Larceny', 'Auto Theft', 'Robbery', 'Larceny From Motor Vehicle', 'Residential Burglary',
    'Simple Assault', 'Harassment', 'Ballistics', 'Aggravated Assault', 'Other Burglary', 
    'Arson', 'Commercial Burglary', 'HOME INVASION', 'Homicide', 'Criminal Harassment', 
    'Manslaughter'])]

# Print the first five rows of the table
crimes.head()

In [None]:
daytime_robberies = crimes[((crimes.OFFENSE_CODE_GROUP == 'Robbery') & \
                            (crimes.HOUR.isin(range(9,18))))]

In [None]:
# Create a map with markers
m_2 = folium.Map(location=[42.32,-71.0589], tiles='cartodbpositron', zoom_start=13)

# Add daytime robberies to the map
for idx, row in daytime_robberies.iterrows():
    Marker([row['Lat'], row['Long']]).add_to(m_2)

# Display the map - All daytime robberies
m_2

In [None]:
# Marker clusters
import math
# Create the map
m_3 = folium.Map(location=[42.32,-71.0589], tiles='cartodbpositron', zoom_start=13)

# Add points to the map
mc = MarkerCluster()
for idx, row in daytime_robberies.iterrows():
    if not math.isnan(row['Long']) and not math.isnan(row['Lat']):
        mc.add_child(Marker([row['Lat'], row['Long']]))
m_3.add_child(mc)

# Display the map
m_3

In [None]:
# Create a base map
m_4 = folium.Map(location=[42.32,-71.0589], tiles='cartodbpositron', zoom_start=12)

def color_producer(val):
    if val <= 12:
        return 'forestgreen'
    else:
        return 'darkred'

# Add a bubble to the base map
for i in range(0,len(daytime_robberies)):
    Circle(
        location=[daytime_robberies.iloc[i]['Lat'], daytime_robberies.iloc[i]['Long']],
        radius=20,
        color=color_producer(daytime_robberies.iloc[i]['HOUR'])).add_to(m_4)

# Display the map
m_4

Exercise 2



1. Plot all of the crimes that occurred in district D14 during 2018


In [None]:
# d14_crimes = crimes[crimes.]
crimes


2. Plot all of the robberies that occurred on Friday evenings (after 18:00 o'clock).



3. Plot all of the robberies that occured on the 4th of July.



4. Plot all of the crimes that occurred on the 1st of January.
   


5. Count (separately) the number of crimes that occured on the 1st of January, December 31st, 4th of July and May 12th. Compare the 4 results.




In [None]:
# Write your code below

In [None]:
import pandas as pd
# Load the data
earthquakes = pd.read_csv("database.csv", encoding='latin-1')
earthquakes


In [None]:
# Create a map
m_2 = folium.Map(location=[42.32,-71.0589], tiles='cartodbpositron', zoom_start=1)

cnt = 0

# Add first 100 earthquakes to the map
for idx, row in earthquakes.iterrows():
    Marker([row['Latitude'], row['Longitude']]).add_to(m_2)

    cnt +=1 
    if cnt == 100:
        break

# Display the map
m_2

### Exercise 3
1. Plot all of the earthquakes that occurred between 41 and 42 degrees latitude.
2. Plot all of the earthquakes that occurred in Boston (or at max 1 degree latitude and 1 degree longitude away from it). How many earthquakes did Boston suffer?
3. Plot all of the earthquakes that were of magnitude greater than 7 using circles on the map. Use the magnitude to increase the circle radius (higher magnitude - bigger circle.

In [None]:
import pandas as pd
# Load the data
cities = pd.read_csv("worldcities.csv", encoding='latin-1')
cities[:5]

### Exercise 4
1. Plot all of cities in Romania using circles and increase the circle radius based on population.
2. Plot all of the earthquakes that took place at max 5 degrees latitude and longitude away from Bucharest. (lat_eq + long_eq - lat_buch - long_buch > 5)


In [None]:
# Write your code below

### Exercise 5
Create two useful visualizations (graph plots and/or map plots) by making use of information from at least two of the datasets provided in the laboratory.

In [None]:
# Write your code below