# Dependencies

In [None]:
import pkg_resources
import sys
import subprocess

# List of required packages (use package names as recognized by pip)
required = {
    'geopandas',
    'osmnx',
    'contextily',
    'libpysal',
    'esda',
    'pointpats',
    'matplotlib',
    'seaborn',
    'scikit-learn'
}

# Get the set of installed packages
installed = {pkg.key for pkg in pkg_resources.working_set}

# Determine which packages are missing
missing = required - installed

if missing:
    print(f"Installing missing packages: {missing}")
    subprocess.check_call([sys.executable, "-m", "pip", "install", *missing])
else:
    print("All required packages are already installed.")

# Python imports

In [4]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import osmnx as ox
from shapely.geometry import Point
from libpysal.weights import Queen
from esda import Moran, Moran_Local
from sklearn.cluster import DBSCAN, KMeans
import os

# Data collection

In [6]:
# Define the local file path and the API URL
data_path = "./data/NYPD_Complaint_Data_Historic_20250403.csv"
url = "https://data.cityofnewyork.us/api/views/qgea-i56i/rows.csv?accessType=DOWNLOAD"

# Specify the columns to load
cols = ["CMPLNT_FR_DT", "LAW_CAT_CD", "BORO_NM", "ADDR_PCT_CD", "Latitude", "Longitude"]

# Check if the file exists locally
if os.path.exists(data_path):
    print("Loading data from local file...")
    crime_df = pd.read_csv(data_path, usecols=cols, parse_dates=["CMPLNT_FR_DT"])
else:
    print("Downloading data from API...")
    crime_df = pd.read_csv(url, usecols=cols, parse_dates=["CMPLNT_FR_DT"])
    # Save the fetched data to a local CSV file for future use
    crime_df.to_csv(data_path, index=False)

# Filter for year 2019
crime_df = crime_df[crime_df["CMPLNT_FR_DT"].dt.year == 2019]

# Drop records with missing or invalid coordinates
crime_df = crime_df.dropna(subset=["Latitude", "Longitude"])
crime_df = crime_df[crime_df["Latitude"] != 0]

print(f"Total records in 2019: {len(crime_df)}")
crime_df.head(3)


Loading data from local file...


AttributeError: Can only use .dt accessor with datetimelike values