In [34]:
# %pip install OSMPythonTools
# %pip install overpy geopandas matplotlib

In [35]:
import json
import sqlite3
import time
from typing import Dict, List

import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from census import Census
from geopy.geocoders import Nominatim
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [36]:
census_api_key = "8f75f43b286ec9dca28f4cf8040e681b096ae03e"

# Neighborhood Analysis and Quality of Life Project
## Overview

You are an urban data scientist analyzing neighborhood characteristics and quality of life metrics across different areas. Your task is to build a comprehensive geospatial analysis using OpenStreetMap data and Census information.

### Requirements:
1. Geospatial Data Collection
- Use OpenStreetMap API to collect Points of Interest (POIs) data
- Calculate walkability scores based on POI density and diversity 
- Get neighborhood boundaries and geographic features
- Collect Census demographic and socioeconomic indicators get free key from https://api.census.gov/data/key_signup.html

2. Data Processing & Integration 
- Clean and standardize location data
- Calculate spatial metrics (e.g., POI density, distances)
- Join Census data with geographic boundaries
- Handle missing values and outliers
- Store all collected data in SQLite database tables with appropriate schemas

3. Analysis & Modeling
- Develop walkability and livability scores
- Cluster neighborhoods based on characteristics 
- Build ML models to predict quality of life metrics
- Analyze spatial patterns and relationships
- Query and analyze data from SQLite database

4. Insights & Visualization
- Create maps showing spatial distributions
- Visualize relationships between variables
- Generate neighborhood profiles and rankings
- Document key findings and recommendations
- Export analysis results to SQLite for persistence


In [37]:
# # Create database connection
# conn = sqlite3.connect('neighborhood_analysis.db')
# cursor = conn.cursor()
#
# # Create tables HERE
# cursor.execute('''
# CREATE TABLE ...
# )''')
#
# cursor.execute('''
# CREATE TABLE ...
# )''')
#
# cursor.execute('''
# CREATE TABLE ...
# )''')

In [52]:
## Part 2: Data Collection Tasks
import overpy

api = overpy.Overpass()


# todo: could skip and get amentities

def get_neighborhood_boundaries(city: str, state: str) -> pd.DataFrame:
    """
    Get neighborhood boundaries using OpenStreetMap API.

    Parameters:
        city: Name of the city
        state: State name

    Returns:
        DataFrame containing neighborhood names and their bounding boxes (min x, min y, max x, max y).
    """
    # Overpass API query to get neighborhood boundaries
    query = f"""
    [out:json][timeout:25];
    area["name"="{state}"]->.stateArea;
    relation["name"="{city}"]["admin_level"="8"](area.stateArea);
    out body;
    >;
    out skel qt;
    """

    try:
        # Execute the query
        result = api.query(query)

        # Collect neighborhood data
        data = []
        for relation in result.relations:
            # Get the name of the neighborhood (if available)
            name = relation.tags.get("name", "Unknown")

            # Initialize bounding box values
            min_lon, min_lat = float('inf'), float('inf')
            max_lon, max_lat = float('-inf'), float('-inf')

            # Extract coordinates and update bounding box
            for member in relation.members:
                if member.role == "outer" and isinstance(member, overpy.RelationWay):
                    way = member.resolve()
                    for node in way.nodes:
                        lat, lon = node.lat, node.lon
                        min_lon = min(min_lon, lon)
                        max_lon = max(max_lon, lon)
                        min_lat = min(min_lat, lat)
                        max_lat = max(max_lat, lat)

            # Append bounding box data
            data.append({
                "name": name,
                "min_lon": min_lon,
                "min_lat": min_lat,
                "max_lon": max_lon,
                "max_lat": max_lat
            })

        # Convert the data to a pandas DataFrame
        df = pd.DataFrame(data)
        return df

    except Exception as e:
        print(f"Error: {e}")
        return pd.DataFrame()


def get_points_of_interest(bbox: tuple) -> pd.DataFrame:
    """
    Get POIs (schools, restaurants, parks, etc) using OpenStreetMap API

    Parameters:
        bbox: Tuple of (min_lat, min_lon, max_lat, max_lon) defining search area

    Returns:
        DataFrame containing POI locations and attributes
    """
    # Unpack bounding box
    min_lat, min_lon, max_lat, max_lon = bbox

    # Overpass API query to fetch POIs
    query = f"""
    [out:json][timeout:25];
    (
      node["amenity"](bbox:{min_lat},{min_lon},{max_lat},{max_lon});
      way["amenity"](bbox:{min_lat},{min_lon},{max_lat},{max_lon});
      relation["amenity"](bbox:{min_lat},{min_lon},{max_lat},{max_lon});
    );
    out center; // Return center for ways and relations
    """

    try:
        # Execute the query
        result = api.query(query)

        # Collect POI data
        data = []

        # Nodes
        for node in result.nodes:
            data.append({
                "type": "node",
                "id": node.id,
                "lat": node.lat,
                "lon": node.lon,
                "tags": node.tags
            })

        # Ways
        for way in result.ways:
            center = way.center
            data.append({
                "type": "way",
                "id": way.id,
                "lat": center.lat if center else None,
                "lon": center.lon if center else None,
                "tags": way.tags
            })

        # Relations
        for relation in result.relations:
            center = relation.center
            data.append({
                "type": "relation",
                "id": relation.id,
                "lat": center.lat if center else None,
                "lon": center.lon if center else None,
                "tags": relation.tags
            })

        # Convert the data to a pandas DataFrame
        df = pd.DataFrame(data)
        return df

    except Exception as e:
        print(f"Error: {e}")
        return pd.DataFrame()


def get_census_data(city: str, state: str, api_key: str) -> pd.DataFrame:
    """
    Get demographic and socioeconomic Census data
    
    Parameters:
        city: Name of the city
        state: State abbreviation
        api_key: Census API key
        
    Returns:
        DataFrame containing population, income, education and other metrics
    """
    # Your code here
    pass


def calculate_walkability(pois_df: pd.DataFrame) -> float:
    """
    Calculate walkability score based on POI density and diversity
    
    Parameters:
        pois_df: DataFrame containing POI data
        
    Returns:
        Walkability score from 0-100
    """
    # Your code here
    pass


'''
[out:json][timeout:25];
    (
      node["amenity"](bbox:32.5347979, -117.3098161, 33.1141940, -116.9057417);
      way["amenity"](bbox:32.5347979, -117.3098161, 33.1141940, -116.9057417);
      relation["amenity"](bbox:32.5347979, -117.3098161, 33.1141940, -116.9057417);
    );
    out center; // Return center for ways and relations
'''

In [50]:
get_neighborhood_boundaries("San Diego", "California")

Unnamed: 0,name,min_lon,min_lat,max_lon,max_lat
0,San Diego,-117.3098161,32.5347979,-116.9057417,33.114194


In [51]:


my_bbox = (-117.3098161,)
get_points_of_interest()

TypeError: get_points_of_interest() missing 1 required positional argument: 'bbox'

In [None]:


## Part 3: Data Integration and Analysis
# 1. Join restaurant and demographic data
# 2. Clean and standardize formats
# 3. Handle missing values

def integrate_data():
    """
    Join and clean data from different sources
    """
    # Your code here
    pass


In [None]:
## Part 4: Analysis and Modeling
# 1. Develop walkability and livability scores
# 2. Cluster neighborhoods based on characteristics
# 3. Build ML models to predict quality of life metrics
# 4. Analyze spatial patterns and relationships
# 5. Query and analyze data from SQLite database
