In [1]:
!pip install folium requests

Collecting folium
  Downloading folium-0.20.0-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting branca>=0.6.0 (from folium)
  Downloading branca-0.8.1-py3-none-any.whl.metadata (1.5 kB)
Downloading folium-0.20.0-py2.py3-none-any.whl (113 kB)
Downloading branca-0.8.1-py3-none-any.whl (26 kB)
Installing collected packages: branca, folium
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [folium]
[1A[2KSuccessfully installed branca-0.8.1 folium-0.20.0


In [2]:
import pandas as pd
import numpy as np
import folium
import requests
import seaborn as sns

In [3]:
api_url = ""
response = requests.get(api_url)
data = response.json()
df = pd.DataFrame(data)

In [4]:
df['lat'] = df['lat'].astype(float)
df['long'] = df['long'].astype(float)

In [5]:
df = df[['lat','long']]

In [6]:
lat_min, lat_max = 28.40, 28.88
lon_min, lon_max = 76.84, 77.35

df = df[
    (df['lat'] >= lat_min) & (df['lat'] <= lat_max) &
    (df['long'] >= lon_min) & (df['long'] <= lon_max)
]

In [None]:
print(f"Remaining points after filtering: {len(df)}")

Remaining points after filtering: 2629


In [None]:
lat_min1, lat_max1 = df['lat'].min(), df['lat'].max()
lon_min1, lon_max1 = df['long'].min(), df['long'].max()
print(lat_min1, lat_max1 ,lon_min1, lon_max1)

28.5649967 28.8784733 76.9653197 77.243375


In [7]:
import matplotlib.pyplot as plt
plt.figure(figsize=(8, 8))
plt.scatter(df['long'], df['lat'], s=2, alpha=0.5)
plt.title('Crime Locations Within Delhi')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True)
plt.show()


In [8]:
df.describe()

In [9]:
df.isnull().sum()

lat     0
long    0
dtype: int64

In [10]:
print(df.duplicated().sum())

92


In [11]:
df = df.drop_duplicates().reset_index(drop=True)
print(f"Data points after removing duplicates: {len(df)}")


In [12]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 8))
sns.kdeplot(
    x=df['long'], y=df['lat'],
    cmap="Reds", fill=True, bw_adjust=0.05, levels=20, thresh=0.01
)
plt.title('Crime Density in Outer North Delhi')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True)
plt.show()


In [13]:
plt.figure(figsize=(8, 8))
sns.kdeplot(
    x=df['long'], y=df['lat'],
    cmap="Reds", fill=True, bw_adjust=0.05, levels=20, thresh=0.01
)
plt.scatter(df['long'], df['lat'], s=5, alpha=0.3, color='black')
plt.title('Crime Density + Raw Points')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.grid(True)
plt.show()


In [14]:
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt

k = 50
coords = np.radians(df[['lat', 'long']])
nbrs = NearestNeighbors(n_neighbors=k, metric='haversine').fit(coords)
distances, indices = nbrs.kneighbors(coords)

k_distances = distances[:, -1] * 6371
k_distances = np.sort(k_distances)
plt.figure(figsize=(8, 4))
plt.plot(k_distances)
plt.xlabel("Points sorted by distance")
plt.ylabel(f"Distance to {k}th nearest neighbor (km)")
plt.title("k-distance graph to estimate eps")
plt.grid(True)
plt.show()


In [None]:
eps_km = 0.4
eps_rad = eps_km / 6371  # Earth radius in km

In [None]:
from sklearn.cluster import DBSCAN
coords = np.radians(df[['lat', 'long']])
db = DBSCAN(eps=eps_rad, min_samples=10, metric='haversine').fit(coords)
df['cluster'] = db.labels_


In [15]:

df['cluster'].value_counts()

In [16]:

hot = df[df['cluster'] >= 0]
cluster_centroids = hot.groupby('cluster')[['lat','long']].mean().reset_index()

counts = hot['cluster'].value_counts().rename_axis('cluster').reset_index(name='crime_count')
centroids = pd.merge(cluster_centroids, counts, on='cluster')

centroids = centroids.sort_values('crime_count', ascending=False).reset_index(drop=True)

print(centroids.head(8))  


In [17]:
from pyngrok import ngrok

# Paste your real token inside the quotes
ngrok.set_auth_token("")


In [None]:
pip install fastapi uvicorn scikit-learn pandas numpy httpx



In [None]:
# app.py
from fastapi import FastAPI, HTTPException, Query
from pydantic import BaseModel
from typing import List, Optional
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN
import httpx

app = FastAPI(
    title="Crime-Hotspot API",
    description="Given a URL returning crime records, returns top-8 Delhi hotspots.",
    version="1.0"
)

class CrimeRecord(BaseModel):
    _id: str
    lat: str
    long: str
    crime: Optional[str]
    beat: Optional[str]
    date: Optional[str]
    month: Optional[str]
    year: Optional[str]
    __v: Optional[int]

def find_hotspots(
    records: List[CrimeRecord],
    eps_km: float = 0.4,
    min_samples: int = 10
) -> List[dict]:
    # 1) to DataFrame
    df = pd.DataFrame([r.dict() for r in records])
    df['lat'] = df['lat'].astype(float)
    df['long'] = df['long'].astype(float)

    # 2) Filter to Delhi bounding box
    lat_min, lat_max = 28.4, 28.88
    lon_min, lon_max = 76.84, 77.35
    df = df[
        df['lat'].between(lat_min, lat_max) &
        df['long'].between(lon_min, lon_max)
    ].copy()

    df.drop_duplicates(subset=['lat','long'], inplace=True)

    if df.empty:
        return []

    coords = np.radians(df[['lat','long']])
    eps_rad = eps_km / 6371.0
    db = DBSCAN(eps=eps_rad, min_samples=min_samples, metric='haversine')
    df['cluster'] = db.fit_predict(coords)

    hot = df[df['cluster'] >= 0]

    centroids = (
        hot.groupby('cluster')[['lat','long']].mean().reset_index()
        .merge(
            hot['cluster'].value_counts()
               .rename_axis('cluster')
               .reset_index(name='crime_count'),
            on='cluster'
        )
        .sort_values('crime_count', ascending=False)
        .reset_index(drop=True)
    )

    return centroids.head(8).to_dict(orient='records')


In [None]:
@app.get("/hotspots/")
async def hotspots_from_url(
    source_url: str = Query(
        ...,
        description="Public API endpoint that returns a JSON array of crime records"
    ),
    eps_km: float = Query(0.4, description="Radius for DBSCAN (km)"),
    min_samples: int = Query(10, description="Min samples per cluster")
):
    """
    Fetches crime data from `source_url`, runs DBSCAN+centroid logic,
    and returns top-8 crime hotspots within Delhi.
    """
    # 1) Fetch the data
    async with httpx.AsyncClient(timeout=10) as client:
        resp = await client.get(source_url)
    if resp.status_code != 200:
        raise HTTPException(
            status_code=502,
            detail=f"Failed to fetch data from {source_url}: {resp.status_code}"
        )

    # 2) Parse JSON into our model
    try:
        records = [CrimeRecord(**item) for item in resp.json()]
    except Exception as e:
        raise HTTPException(
            status_code=400,
            detail=f"Invalid JSON schema: {e}"
        )

    # 3) Compute hotspots
    result = find_hotspots(records, eps_km=eps_km, min_samples=min_samples)
    return {"hotspots": result}

In [19]:

!pip install nest_asyncio pyngrok

from pyngrok import ngrok
import uvicorn

nest_asyncio.apply()

#    If you don’t need external access, you can skip the ngrok lines.
public_url = ngrok.connect(8000).public_url
print(f"🚀 Public URL: {public_url}/hotspots/?source_url=<YOUR_CRIME_API>")

uvicorn.run(app, host="0.0.0.0", port=8000)
