# Dependencies

In [None]:
import pkg_resources
import sys
import subprocess

# List of required packages (use package names as recognized by pip)
required = {
    'geopandas',
    'osmnx',
    'contextily',
    'libpysal',
    'esda',
    'pointpats',
    'matplotlib',
    'seaborn',
    'scikit-learn'
}

# Get the set of installed packages
installed = {pkg.key for pkg in pkg_resources.working_set}

# Determine which packages are missing
missing = required - installed

if missing:
    print(f"Installing missing packages: {missing}")
    subprocess.check_call([sys.executable, "-m", "pip", "install", *missing])
else:
    print("All required packages are already installed.")

# Python imports

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import osmnx as ox
from shapely.geometry import Point
from libpysal.weights import Queen
from esda import Moran, Moran_Local
from sklearn.cluster import DBSCAN, KMeans




Defaulting to user installation because normal site-packages is not writeable
Collecting geopandas
  Downloading geopandas-1.0.1-py3-none-any.whl.metadata (2.2 kB)
Collecting osmnx
  Downloading osmnx-2.0.2-py3-none-any.whl.metadata (4.9 kB)
Collecting contextily
  Downloading contextily-1.6.2-py3-none-any.whl.metadata (2.9 kB)
Collecting libpysal
  Downloading libpysal-4.13.0-py3-none-any.whl.metadata (4.8 kB)
Collecting esda
  Downloading esda-2.7.0-py3-none-any.whl.metadata (2.0 kB)
Collecting pointpats
  Downloading pointpats-2.5.1-py3-none-any.whl.metadata (4.7 kB)
Collecting pyogrio>=0.7.2 (from geopandas)
  Downloading pyogrio-0.10.0-cp312-cp312-win_amd64.whl.metadata (5.6 kB)
Collecting pyproj>=3.3.0 (from geopandas)
  Downloading pyproj-3.7.1-cp312-cp312-win_amd64.whl.metadata (31 kB)
Collecting shapely>=2.0.0 (from geopandas)
  Downloading shapely-2.1.0-cp312-cp312-win_amd64.whl.metadata (7.0 kB)
Collecting geopy (from contextily)
  Downloading geopy-2.4.1-py3-none-any.whl.me

# Data collection

In [3]:
# Load NYPD Complaint Data (Historic) and filter for year 2019
url = "https://data.cityofnewyork.us/api/views/qgea-i56i/rows.csv?accessType=DOWNLOAD"
# Only load needed columns to save memory
cols = ["CMPLNT_FR_DT", "LAW_CAT_CD", "BORO_NM", "ADDR_PCT_CD", "Latitude", "Longitude"]
crime_df = pd.read_csv(url, usecols=cols, parse_dates=["CMPLNT_FR_DT"])
# Filter for year 2019
crime_df = crime_df[crime_df["CMPLNT_FR_DT"].dt.year == 2019]
# Drop records with missing or invalid coordinates
crime_df = crime_df.dropna(subset=["Latitude", "Longitude"])
crime_df = crime_df[crime_df["Latitude"] != 0]
print(f"Total records in 2019: {len(crime_df)}")
crime_df.head(3)


KeyboardInterrupt: 