## Importing libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import plotly.offline as pyo

## Loading datasets

In [2]:
pre_covid_data = pd.read_csv('pre_covid_data.csv')
pre_covid_data.head()

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Status Desc,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON,YEAR
0,180406341,02/22/2018 12:00:00 AM,2018-02-21,1500,4,Hollenbeck,421,1,330,BURGLARY FROM VEHICLE,...,Invest Cont,330.0,,,,2100 N BROADWAY,,34.0732,-118.2189,2018
1,191907191,03/11/2019 12:00:00 AM,2019-03-08,1500,19,Mission,1918,1,510,VEHICLE - STOLEN,...,Invest Cont,510.0,,,,13200 FOOTHILL BL,,34.2991,-118.4211,2019
2,191918746,11/01/2019 12:00:00 AM,2019-10-31,1900,19,Mission,1972,1,510,VEHICLE - STOLEN,...,Adult Arrest,510.0,,,,9000 BURNET AV,,34.2337,-118.4632,2019
3,181908455,03/27/2018 12:00:00 AM,2018-03-26,1900,19,Mission,1935,1,330,BURGLARY FROM VEHICLE,...,Invest Cont,330.0,,,,11400 INDIAN WELLS RD,,34.2752,-118.4603,2018
4,181706416,02/20/2018 12:00:00 AM,2018-02-20,815,17,Devonshire,1772,1,330,BURGLARY FROM VEHICLE,...,Invest Cont,330.0,,,,9400 PENFIELD AV,,34.2403,-118.5696,2018


In [3]:
covid_data = pd.read_csv('covid_data.csv')
covid_data.head()

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Status Desc,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON,YEAR
0,190326475,03/01/2020 12:00:00 AM,2020-03-01,2130,7,Wilshire,784,1,510,VEHICLE - STOLEN,...,Adult Arrest,510.0,998.0,,,1900 S LONGWOOD AV,,34.0375,-118.3506,2020
1,200106753,02/09/2020 12:00:00 AM,2020-02-08,1800,1,Central,182,1,330,BURGLARY FROM VEHICLE,...,Invest Cont,330.0,998.0,,,1000 S FLOWER ST,,34.0444,-118.2628,2020
2,200320258,11/11/2020 12:00:00 AM,2020-11-04,1700,3,Southwest,356,1,480,BIKE - STOLEN,...,Invest Cont,480.0,,,,1400 W 37TH ST,,34.021,-118.3002,2020
3,200907217,05/10/2023 12:00:00 AM,2020-03-10,2037,9,Van Nuys,964,1,343,SHOPLIFTING-GRAND THEFT ($950.01 & OVER),...,Invest Cont,343.0,,,,14000 RIVERSIDE DR,,34.1576,-118.4387,2020
4,220614831,08/18/2022 12:00:00 AM,2020-08-17,1200,6,Hollywood,666,2,354,THEFT OF IDENTITY,...,Invest Cont,354.0,,,,1900 TRANSIENT,,34.0944,-118.3277,2020


In [4]:
post_covid_data = pd.read_csv('post_covid_data.csv')
post_covid_data.head()

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Status Desc,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON,YEAR
0,211820050,11/15/2021 12:00:00 AM,2021-11-13,900,18,Southeast,1802,1,510,VEHICLE - STOLEN,...,Invest Cont,510.0,,,,8800 S BROADWAY,,33.9574,-118.2782,2021
1,212116726,11/21/2021 12:00:00 AM,2021-11-20,1912,21,Topanga,2129,1,510,VEHICLE - STOLEN,...,Invest Cont,510.0,,,,7900 QUAKERTOWN AV,,34.214,-118.5696,2021
2,211909242,05/30/2021 12:00:00 AM,2021-05-30,100,19,Mission,1963,1,510,VEHICLE - STOLEN,...,Invest Cont,510.0,,,,9800 WOODMAN AV,,34.2454,-118.4505,2021
3,211916536,12/09/2021 12:00:00 AM,2021-10-25,2000,19,Mission,1958,1,510,VEHICLE - STOLEN,...,Invest Cont,510.0,,,,9700 WOODMAN AV,,34.2464,-118.4477,2021
4,211211485,05/03/2021 12:00:00 AM,2021-05-03,450,12,77th Street,1257,1,510,VEHICLE - STOLEN,...,Invest Cont,510.0,,,,78TH,FIGUEROA,33.9688,-118.2827,2021


## Combining and saving the DataFrames

In [5]:
combined_df = pd.concat([pre_covid_data, covid_data, post_covid_data])

# Save the combined DataFrame to a new CSV file
combined_df.to_csv('combined_file.csv', index=False)

## Data Processing

In [6]:
# Function to categorize time of occurrence
def categorize_time(hour):
    if 0 <= hour < 6:
        return 'Night'
    elif 6 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 18:
        return 'Afternoon'
    else:
        return 'Evening'

# Parse TIME OCC as hour for categorization
combined_df['Time Category'] = (combined_df['TIME OCC'].astype(int) // 100).apply(categorize_time)

# List of all areas in the dataset
all_areas = ['Wilshire', 'Central', 'Southwest', 'Van Nuys', 'Hollywood', 'Southeast', 'Newton', 'Mission', 'Rampart', 'West Valley',
             'West LA', 'Olympic', 'Hollenbeck', 'Topanga', 'Northeast', '77th Street', 'Pacific', 'N Hollywood', 'Harbor', 'Foothill', 'Devonshire']

# Group by area and time category and count occurrences
time_counts = combined_df.groupby(['AREA NAME', 'Time Category']).size().reset_index(name='Counts')

## Plotting the data

In [7]:
# Plot setup
fig = go.Figure()

# Add traces for each area
for area in all_areas:
    filtered_data = time_counts[time_counts['AREA NAME'] == area]
    fig.add_trace(go.Scatterpolar(
        r=filtered_data['Counts'],
        theta=filtered_data['Time Category'],
        name=area,
        visible=True  # Set to "legendonly" to hide by default
    ))

# Update the layout and add a dropdown for selecting areas
buttons = [dict(label="Select All",
                method="update",
                args=[{"visible": [True]*len(all_areas)}]),
           dict(label="Deselect All",
                method="update",
                args=[{"visible": [False]*len(all_areas)}])]

# Add buttons for individual areas
for area in all_areas:
    buttons.append(dict(label=area,
                        method="update",
                        args=[{"visible": [area == area_name for area_name in all_areas]}]))

fig.update_layout(
    title="CRIME BY TIME OF OCCURRENCE ACROSS DIFFERENT AREAS",
    title_font=dict(size=24, color="navy", family="Arial"),  # Big, bold, red title
    xaxis_title="Time Period",
    yaxis_title="Number of Occurrences",
    xaxis=dict(
        tickfont=dict(size=14, family="Arial, sans-serif"),
        title_font=dict(size=18, family="Arial, sans-serif"),
    ),
    yaxis=dict(
        tickfont=dict(size=14, family="Arial, sans-serif"),
        title_font=dict(size=18, family="Arial, sans-serif"),
    ),
    font=dict(family="Arial"),  # Change font for axes
    plot_bgcolor="LIGHTYELLOW",

    paper_bgcolor="lightgrey",  # Apply Plotly dark template
    updatemenus=[dict(active=0,
                      buttons=buttons,
                      x=2,
                      xanchor="left",
                      y=1,
                      yanchor="top")],
    polar=dict(
        radialaxis=dict(visible=True)
    )
)

fig.update_traces(fill='toself')
fig.show()


Moving from the spatial to the temporal, Jack uses a spider plot to delve into the times when crimes are most likely to occur. This plot, vividly colored and comprehensive, displays the distribution of crimes across different times of day—Morning, Afternoon, Evening, and Night—across various police precincts such as Wilshire, Central, and Hollywood, among others.

From the spider plot, Jack observes several key insights:

* **Evening Peak:** The majority of areas experience a significant uptick in crime during the evening hours, which suggests that increased patrols and community engagement initiatives during these hours might be effective.
* **Area-Specific Trends:** While some areas like Central and Hollywood show a pronounced rise in evening crimes, others such as Van Nuys and Newton exhibit a more balanced distribution throughout the day. This variability necessitates area-specific policing strategies.
* **Comparative Analysis:** The visual comparison across areas helps Jack evaluate the effectiveness of current policing strategies and understand where additional resources or new tactics are needed.

## Saving the figure as an HTML file

In [8]:
pyo.plot(fig, filename='plot_2.html', auto_open=False)

'plot_2.html'