In [1]:
# imports 
import os
import sys
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from scipy.stats import linregress
from plotly.subplots import make_subplots
import glob

In [2]:
# imports
import os
import pandas as pd
import plotly.express as px
import plotly.io as pio

# Ensure the correct renderer is used for Jupyter notebooks
pio.renderers.default = "iframe"

# DEFINE FUNCTIONS

def plot_crime_map(df: pd.DataFrame, sample_size=10000):
    # Sample the data if it's too large
    if len(df) > sample_size:
        df = df.sample(sample_size)
    
    fig = px.scatter_mapbox(
        df, lat="Latitude", lon="Longitude", color="Crime type", hover_name="Location",
        hover_data=["Crime type", "Last outcome category", "Month"], zoom=10, height=600,
        title="Crime locations in London"
    )
    fig.update_layout(mapbox_style="open-street-map")
    fig.show()

# MAIN SCRIPT

# Define data directory
data_dir = '../data/met_data'

# Debug: Print the current working directory
print(f"Current working directory: {os.getcwd()}")

# Check if the directory exists
if not os.path.exists(data_dir):
    raise Exception(f"Data directory '{data_dir}' does not exist.")
else:
    all_files = []
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith('-metropolitan-street.csv'):
                all_files.append(os.path.join(root, file))

    # Check if there are any files to read
    if not all_files:
        raise Exception(f"No CSV files found in the directory '{data_dir}'.")
    else:
        df_list = [pd.read_csv(file) for file in all_files]

        # Check if there are any DataFrames to concatenate
        if not df_list:
            raise Exception("No dataframes to concatenate.")
        else:
            # Combine all dataframes into one
            df_all_years = pd.concat(df_list, ignore_index=True)

            # Display the map with all crimes
            plot_crime_map(df=df_all_years)

            # Optionally, display some statistics or other visualizations
            print("Crime Statistics:")
            crime_counts = df_all_years['Crime type'].value_counts().reset_index()
            crime_counts.columns = ['Crime type', 'Count']
            print(crime_counts)

            # Plot a bar chart of crime counts
            fig = px.bar(crime_counts, x='Crime type', y='Count', title='Total Crime Counts by Type')
            fig.show()


Current working directory: /Users/pvicovanberkel/PycharmProjects/JBG050_Group_8/app


Crime Statistics:
                      Crime type    Count
0          Anti-social behaviour  3705162
1   Violence and sexual offences  2352790
2                    Other theft  1512721
3                  Vehicle crime  1317048
4                       Burglary   966637
5      Criminal damage and arson   712100
6                   Public order   521379
7                    Shoplifting   519159
8                          Drugs   511607
9                    Other crime   486136
10         Theft from the person   484486
11                       Robbery   390488
12                 Violent crime   339718
13                 Bicycle theft   206525
14         Possession of weapons    54811
15   Public disorder and weapons    50433
