In [9]:
import requests
import pandas as pd
import time

# Load crime data
df = pd.read_csv("NYPD_Complaint_Data_YTD.csv")

# Keep only necessary columns (use the correct column names from the CSV)
df_filtered = df[['lat_lon.latitude', 'lat_lon.longitude']].dropna().head(10)  # Limiting to 10 rows to avoid API overload

# All relevant OSM place types
# place_types = [
#     "hospital", "clinic", "pharmacy", "retirement_home", "dormitory", "apartment", 
#     "park", "place_of_worship", "library", "bus_station", "subway_entrance", "train_station", 
#     "school", "college", "university", "bar", "nightclub", "restaurant", "cafe", "mall", 
#     "supermarket", "police", "fire_station", "embassy", "shelter", "social_facility"
# ]
place_types = [
    "hospital", "clinic", "pharmacy"
]

# Function to fetch nearby places using OpenStreetMap's Nominatim API
def get_osm_places(lat, lon, place_type, radius=500):
    url = f"https://nominatim.openstreetmap.org/search?format=json&q={place_type}&lat={lat}&lon={lon}&radius={radius}"
    
    headers = {"User-Agent": "crime-analysis-bot"}  # Required by Nominatim's usage policy
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an error for bad status codes
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for {place_type} at ({lat}, {lon}): {e}")
        return []  # Return empty list if API fails
    
    data = response.json()
    
    places = []
    for place in data:
        places.append({
            "name": place.get("display_name", "Unknown"),
            "lat": place.get("lat", None),
            "lon": place.get("lon", None),
            "type": place_type
        })
    
    return places

# Iterate over crime locations and fetch all place types
osm_places = []
for index, row in df_filtered.iterrows():
    lat, lon = row["lat_lon.latitude"], row["lat_lon.longitude"]
    
    for place_type in place_types:
        places = get_osm_places(lat, lon, place_type=place_type)
        for p in places:
            p["crime_lat"] = lat
            p["crime_lon"] = lon
            osm_places.append(p)
    
    time.sleep(1)  # Avoid rate limiting (1 request per second)

# Convert to DataFrame
df_osm_places = pd.DataFrame(osm_places)

# Save to CSV
df_osm_places.to_csv("crime_nearby_establishments.csv", index=False)

print("✅ Done! Check 'crime_nearby_establishments.csv' for results.")

✅ Done! Check 'crime_nearby_establishments.csv' for results.


# Trying to get the places to be in new york

In [None]:
import requests
import pandas as pd
import time
from geopy.distance import geodesic

# Load crime data
df = pd.read_csv("NYPD_Complaint_Data_YTD.csv")

# Filter for NYC coordinates (expanded bounding box)
nyc_min_lat, nyc_max_lat = 40.4774, 40.9176
nyc_min_lon, nyc_max_lon = -74.2591, -73.7004

# Keep only NYC locations
df_filtered = df[
    (df['lat_lon.latitude'].between(nyc_min_lat, nyc_max_lat)) & 
    (df['lat_lon.longitude'].between(nyc_min_lon, nyc_max_lon))
].dropna().head(10)  # Still limiting to 10 rows for testing

# NYC-specific place types with OSM tags
place_categories = {
    "hospital": {"amenity": "hospital"},
    "clinic": {"amenity": "clinic"},
    "school": {"amenity": "school"},
    "police": {"amenity": "police"},
    "pharmacy": {"amenity": "pharmacy"}
}

def get_nearby_nyc_places(lat, lon, place_type, tags, radius=10000, max_distance_km=15):
    print("Trying to find")
    """
    Improved function to find nearby places using different query methods
    """
    # Method 1: Standard nearby search with tags
    url = f"https://nominatim.openstreetmap.org/search?format=json&lat={lat}&lon={lon}&radius={radius}"
    
    # Add tags to query
    for key, value in tags.items():
        url += f"&{key}={value}"
    
    headers = {
        "User-Agent": "nyc-crime-analysis-v2",
        "Accept-Language": "en-US,en;q=0.9"
    }
    
    try:
        print(f"Querying for {place_type} at ({lat:.6f}, {lon:.6f})...")
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        time.sleep(1.1)  # Slightly more than 1 second to be safe
        
        data = response.json()
        print(f"Found {len(data)} potential {place_type} results")
        
        places = []
        crime_location = (lat, lon)
        
        for place in data:
            try:
                place_lat = float(place.get('lat'))
                place_lon = float(place.get('lon'))
                place_location = (place_lat, place_lon)
                
                # Verify it's within our max distance
                distance = geodesic(crime_location, place_location).meters
                if distance <= max_distance_km * 1000:
                    places.append({
                        "name": place.get("display_name", "Unknown").split(",")[0],
                        "lat": place_lat,
                        "lon": place_lon,
                        "type": place_type,
                        "distance_meters": round(distance, 1),
                        "osm_type": place.get("type", "node")
                    })
            except (ValueError, AttributeError) as e:
                print(f"Skipping invalid place data: {e}")
                continue
        
        # If no results, try alternative query method
        if not places:
            print(f"No results found with standard query, trying alternative method...")
            return try_alternative_query(lat, lon, place_type, tags, max_distance_km)
        
        return places
    
    except Exception as e:
        print(f"Error for {place_type} at ({lat},{lon}): {e}")
        return []

def try_alternative_query(lat, lon, place_type, tags, max_distance_km):
    """Alternative query method using bounding box search"""
    bbox_offset = 0.01  # ~1km bounding box
    bbox = (
        lon - bbox_offset,  # min lon
        lat - bbox_offset,  # min lat
        lon + bbox_offset,  # max lon
        lat + bbox_offset   # max lat
    )
    
    url = f"https://nominatim.openstreetmap.org/search?format=json&q={place_type}&bounded=1&viewbox={','.join(map(str, bbox))}"
    
    headers = {"User-Agent": "nyc-crime-analysis-alt"}
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        time.sleep(1.1)
        
        places = []
        crime_location = (lat, lon)
        
        for place in response.json():
            place_lat = float(place.get('lat'))
            place_lon = float(place.get('lon'))
            place_location = (place_lat, place_lon)
            
            distance = geodesic(crime_location, place_location).meters
            if distance <= max_distance_km * 1000:
                places.append({
                    "name": place.get("display_name", "Unknown").split(",")[0],
                    "lat": place_lat,
                    "lon": place_lon,
                    "type": place_type,
                    "distance_meters": round(distance, 1),
                    "osm_type": place.get("type", "node"),
                    "query_method": "alternative"
                })
        
        print(f"Alternative method found {len(places)} {place_type} results")
        return places
    
    except Exception as e:
        print(f"Alternative query failed: {e}")
        return []

# Collect all nearby places
osm_places = []
for index, row in df_filtered.iterrows():
    lat, lon = row["lat_lon.latitude"], row["lat_lon.longitude"]
    print(f"\nProcessing crime location {index+1} at ({lat:.6f}, {lon:.6f})")
    
    for place_type, tags in place_categories.items():
        print("trying to try")
        places = get_nearby_nyc_places(lat, lon, place_type, tags)
        for p in places:
            p["crime_lat"] = lat
            p["crime_lon"] = lon
            osm_places.append(p)
    
    print(f"Finished processing location {index+1}")

# Create DataFrame and save
if osm_places:
    df_osm_places = pd.DataFrame(osm_places)
    # Sort by distance
    df_osm_places = df_osm_places.sort_values(by=['crime_lat', 'crime_lon', 'distance_meters'])
    df_osm_places.to_csv("nyc_crime_nearby_establishments.csv", index=False)
    print(f"\n✅ Saved {len(df_osm_places)} nearby places to 'nyc_crime_nearby_establishments.csv'")
    print("Sample results:")
    print(df_osm_places.head())
else:
    print("\n❌ No nearby places found. Possible solutions:")
    print("1. Try increasing the search radius (currently 1000m)")
    print("2. Verify your input coordinates are correct")
    print("3. Check if Nominatim API is responding (try the query URL in browser)")
    print("4. Try during off-peak hours (Nominatim has usage limits)")


❌ No nearby places found. Possible solutions:
1. Try increasing the search radius (currently 1000m)
2. Verify your input coordinates are correct
3. Check if Nominatim API is responding (try the query URL in browser)
4. Try during off-peak hours (Nominatim has usage limits)


# Trying again from scratch

In [1]:
import pandas as pd
import requests
import time
from tqdm import tqdm

# Load your dataset
df = pd.read_csv('NYPD_Complaint_Data_YTD.csv')

# Overpass API (OpenStreetMap) configuration
OVERPASS_URL = "http://overpass-api.de/api/interpreter"
RADIUS = 100  # Search radius in meters

# Function to get nearby places from OpenStreetMap
def get_osm_nearby(latitude, longitude, place_type):
    # Map our categories to OSM tags
    osm_tags = {
        'hospital': 'amenity=hospital',
        'school': 'amenity=school',
        'restaurant': 'amenity=restaurant'
    }
    
    query = f"""
    [out:json];
    (
      node[{osm_tags[place_type]}](around:{RADIUS},{latitude},{longitude});
      way[{osm_tags[place_type]}](around:{RADIUS},{latitude},{longitude});
      relation[{osm_tags[place_type]}](around:{RADIUS},{latitude},{longitude});
    );
    out center;
    """
    
    try:
        response = requests.get(OVERPASS_URL, params={'data': query})
        data = response.json()
        
        places = []
        for element in data.get('elements', []):
            name = element.get('tags', {}).get('name', 'Unnamed')
            places.append({
                'name': name,
                'type': place_type
            })
        return places[:3]  # Return up to 3 results
    except Exception as e:
        print(f"Error fetching {place_type} data: {e}")
        return []

# Process each row in the dataset
results = []

for index, row in tqdm(df.iterrows(), total=len(df)):
    if pd.notna(row['latitude']) and pd.notna(row['longitude']):
        latitude = row['latitude']
        longitude = row['longitude']
        cmplnt_num = row['cmplnt_num']
        
        for place_type in ['hospital', 'school', 'restaurant']:
            places = get_osm_nearby(latitude, longitude, place_type)
            for place in places:
                results.append({
                    'cmplnt_num': cmplnt_num,
                    'latitude': latitude,
                    'longitude': longitude,
                    'place_name': place['name'],
                    'place_type': place['type']
                })
        
        # Respect API rate limits
        time.sleep(1)  # Overpass API has stricter rate limits

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Save to new CSV file
results_df.to_csv('NYPD_Complaints_with_OSM_Places.csv', index=False)

print("Processing complete. Results saved to NYPD_Complaints_with_OSM_Places.csv")

  0%|          | 2/33952 [00:30<146:07:33, 15.49s/it]


KeyboardInterrupt: 

In [4]:
import requests
import pandas as pd
import time

# Load crime data
df = pd.read_csv("NYPD_Complaint_Data_YTD.csv")

# Keep only necessary columns (use the correct column names from the CSV)
df_filtered = df[['lat_lon.latitude', 'lat_lon.longitude']].dropna().head(10)  # Limiting to 10 rows to avoid API overload

# Reduced list of relevant OSM place types for testing
place_types = [
    "hospital", "school", "police"  # Only 3 place types for faster testing
]

# Function to fetch nearby places using OpenStreetMap's Nominatim API
def get_osm_places(lat, lon, place_type, radius=500):
    url = f"https://nominatim.openstreetmap.org/search?format=json&q={place_type}&lat={lat}&lon={lon}&radius={radius}"
    
    headers = {"User-Agent": "crime-analysis-bot"}  # Required by Nominatim's usage policy
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an error for bad status codes
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for {place_type} at ({lat}, {lon}): {e}")
        return []  # Return empty list if API fails
    
    data = response.json()
    
    places = []
    for place in data:
        places.append({
            "name": place.get("display_name", "Unknown"),
            "lat": place.get("lat", None),
            "lon": place.get("lon", None),
            "type": place_type
        })
    
    return places

# Iterate over crime locations and fetch only a few place types
osm_places = []
for index, row in df_filtered.iterrows():
    lat, lon = row["lat_lon.latitude"], row["lat_lon.longitude"]
    
    for place_type in place_types:
        places = get_osm_places(lat, lon, place_type=place_type)
        for p in places:
            p["crime_lat"] = lat
            p["crime_lon"] = lon
            osm_places.append(p)
    
    time.sleep(1)  # Avoid rate limiting (1 request per second)

# Convert to DataFrame
df_osm_places = pd.DataFrame(osm_places)

# Save to CSV
df_osm_places.to_csv("crime_nearby_establishments.csv", index=False)

print("✅ Done! Check 'crime_nearby_establishments.csv' for results.")


✅ Done! Check 'crime_nearby_establishments.csv' for results.


In [2]:
import pandas as pd
import folium

# Read the CSV file
df = pd.read_csv('crime_nearby_establishments.csv')

# Get the crime location from the first row (assuming it's the same for all)
crime_lat = df.iloc[0]['crime_lat']
crime_lon = df.iloc[0]['crime_lon']

# Create a map centered on the crime location
m = folium.Map(location=[crime_lat, crime_lon], zoom_start=2)

# Define color mapping for different types
type_colors = {
    'hospital': 'green',
    'school': 'blue',
    'police': 'orange'
}

# Add markers for each establishment
for idx, row in df.iterrows():
    folium.Marker(
        location=[row['lat'], row['lon']],
        popup=f"<b>{row['type'].title()}</b><br>{row['name']}",
        icon=folium.Icon(color=type_colors.get(row['type'], 'gray'))
    ).add_to(m)

# Add crime location marker (red)
folium.Marker(
    location=[crime_lat, crime_lon],
    popup="<b>Crime Location</b>",
    icon=folium.Icon(color='red')
).add_to(m)

# Add layer control to toggle different types
folium.LayerControl().add_to(m)

# Save to HTML file
m.save("crime_establishments_map.html")

In [3]:
import pandas as pd
import folium

# Read the CSV file
df = pd.read_csv('crime_nearby_establishments.csv')

# Get crime location
crime_lat = df.iloc[0]['crime_lat']
crime_lon = df.iloc[0]['crime_lon']

# Create map
m = folium.Map(location=[crime_lat, crime_lon], zoom_start=2)

# Create feature groups for each type
feature_groups = {
    'hospital': folium.FeatureGroup(name='Hospitals'),
    'school': folium.FeatureGroup(name='Schools'),
    'police': folium.FeatureGroup(name='Police Stations')
}

# Color mapping
type_colors = {
    'hospital': 'green',
    'school': 'blue',
    'police': 'orange'
}

# Add markers to appropriate feature groups
for idx, row in df.iterrows():
    establishment_type = row['type']
    if establishment_type in feature_groups:
        folium.Marker(
            location=[row['lat'], row['lon']],
            popup=f"<b>{row['type'].title()}</b><br>{row['name']}",
            icon=folium.Icon(color=type_colors.get(establishment_type, 'gray'))
        ).add_to(feature_groups[establishment_type])

# Add all feature groups to map
for group in feature_groups.values():
    group.add_to(m)

# Add crime location
folium.Marker(
    location=[crime_lat, crime_lon],
    popup="<b>Crime Location</b>",
    icon=folium.Icon(color='red', icon='exclamation-triangle')
).add_to(m)

# Add layer control
folium.LayerControl().add_to(m)

# Save map
m.save("enhanced_crime_establishments_map.html")

In [4]:
import pandas as pd
import folium
from folium import plugins

# Read the CSV file
df = pd.read_csv('crime_nearby_establishments.csv')

# Get crime location from the first row
crime_lat = df.iloc[0]['crime_lat']
crime_lon = df.iloc[0]['crime_lon']
crime_location = [crime_lat, crime_lon]

# Create map with detailed OpenStreetMap tiles
m = folium.Map(
    location=crime_location,
    zoom_start=12,  # Start more zoomed in for street details
    tiles='OpenStreetMap',  # Detailed street map
    control_scale=True
)

# Add optional satellite imagery layer
folium.TileLayer(
    tiles='https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
    attr='Esri World Imagery',
    name='Satellite View',
    overlay=False
).add_to(m)

# Custom icon colors
icon_colors = {
    'hospital': 'green',
    'school': 'blue',
    'police': 'orange'
}

# Custom icons (using Font Awesome icons)
icon_types = {
    'hospital': 'medkit',
    'school': 'graduation-cap',
    'police': 'shield'
}

# Create feature groups for each type
feature_groups = {
    'hospital': folium.FeatureGroup(name='🏥 Hospitals'),
    'school': folium.FeatureGroup(name='🏫 Schools'),
    'police': folium.FeatureGroup(name='👮 Police Stations')
}

# Create a feature group for connection lines
connection_lines = folium.FeatureGroup(name='🔗 Connections to Crime')

# Add markers and connection lines
for idx, row in df.iterrows():
    establishment_type = row['type']
    establishment_loc = [row['lat'], row['lon']]
    
    if establishment_type in feature_groups:
        # Create custom icon
        icon = folium.Icon(
            color=icon_colors.get(establishment_type, 'gray'),
            icon=icon_types.get(establishment_type, 'info-sign'),
            prefix='fa'  # Font Awesome prefix
        )
        
        # Add marker
        folium.Marker(
            location=establishment_loc,
            popup=f"""
                <b>{row['type'].title()}</b><br>
                {row['name']}<br>
                <small>Lat: {row['lat']:.4f}, Lon: {row['lon']:.4f}</small>
            """,
            icon=icon
        ).add_to(feature_groups[establishment_type])
        
        # Add connection line
        folium.PolyLine(
            locations=[establishment_loc, crime_location],
            color=icon_colors.get(establishment_type, 'gray'),
            weight=1.5,
            opacity=0.7,
            popup=f"Connection to {row['type']}",
            tooltip=f"{row['type'].title()} to Crime Site"
        ).add_to(connection_lines)

# Add crime location marker with custom icon
folium.Marker(
    location=crime_location,
    popup="<b>Crime Location</b>",
    icon=folium.Icon(color='red', icon='exclamation-triangle', prefix='fa')
).add_to(m)

# Add all feature groups to map
for group in feature_groups.values():
    group.add_to(m)
connection_lines.add_to(m)

# Add layer control
folium.LayerControl(collapsed=False).add_to(m)

# Add minimap for navigation
minimap = plugins.MiniMap()
m.add_child(minimap)

# Add measure control
plugins.MeasureControl(position='bottomleft').add_to(m)

# Add fullscreen button
plugins.Fullscreen(position='topright').add_to(m)

# Save map
m.save("detailed_crime_analysis_map.html")

print("Map saved as 'detailed_crime_analysis_map.html'")

Map saved as 'detailed_crime_analysis_map.html'
