In [21]:
import json
from typing import List, Dict, Optional, Tuple
import time
from shapely.geometry import Polygon, Point
import overpy
import os

In [22]:
with open("bariCoordinates.json") as f:
    COORDINATES = json.load(f)

In [23]:
# commented were not found
AMENITIES = {
    # # Food & Drink
    # 'bar': 'Establishments selling alcoholic drinks with vibrant atmosphere',
    # 'pub': 'Traditional establishments with alcoholic drinks and food',
    # # 'restaurant': 'Places serving meals with seating',
    # 'cafe': 'Informal places serving beverages and light meals',
    # 'fast_food': 'Quick service food establishments',
    # # 'biergarten': 'Outdoor beer gardens',
    'ice_cream': 'Ice cream parlors',
    'food_court': 'Areas with multiple food vendors',
    # # Entertainment & Nightlife
    'nightclub': 'Places for dancing and drinking at night',
    # # 'social_club': 'Member-only social establishments',
    # 'casino': 'Gambling establishments',
    # 'cinema': 'Movie theaters',
    # #### ADDED
    # 'arts_centre': 'Venues for various arts performances',
    # 'music_venue': 'Indoor venues for live contemporary music',
    # # 'community_centre': 'Community centers for local events',
    # # 'social_centre': 'Places for free and not-for-profit activities',
    'events_venue': 'Buildings specifically for organizing events',
    # 'marketplace': 'Public marketplaces for daily or weekly trading',
    # # 'place_of_worship': 'Churches, mosques, temples and other religious buildings',
    # # 'monastery': 'Monasteries and religious living quarters',
    'internet_cafe': 'Cafes providing internet access',
}

In [24]:
API = overpy.Overpass()

In [25]:
def create_polygon(coordinates: List) -> Polygon:
    """Create a Polygon from coordinates."""
    polygon = Polygon(coordinates)
    return polygon

In [26]:
def get_polygon_coords_string(polygon: Polygon) -> str:
    """Convert polygon coordinates to Overpass API polygon format."""
    coords = list(polygon.exterior.coords)
    
    # Convert to Overpass polygon format: "lat1 lon1 lat2 lon2 lat3 lon3 ..."
    coord_pairs = []
    for lon, lat in coords:
        coord_pairs.extend([str(lat), str(lon)])
    
    polygon_string = " ".join(coord_pairs)
    return polygon_string

In [27]:
def point_in_polygon(polygon: Polygon, lat: float, lon: float) -> bool:
    """Check if a point is within the polygon."""
    point = Point(lon, lat)
    return polygon.contains(point)

In [28]:
def is_handled_tag(tag_key: str) -> bool:
    """Check if a tag is already handled in the structured extraction."""
    handled_prefixes = [
        'name', 'alt_name', 'official_name', 'short_name', 'loc_name', 'old_name',
        'addr:', 'contact:', 'phone', 'email', 'website', 'facebook', 'instagram',
        'opening_hours', 'cuisine', 'diet:', 'outdoor_seating', 'wheelchair', 
        'internet_access', 'smoking', 'takeaway', 'delivery', 'capacity',
        'description', 'operator', 'brand', 'building', 'level', 'indoor',
        'fee', 'charge', 'payment:', 'accepted_cards', 'cash',
        'air_conditioning', 'heating', 'reservation', 'dress_code',
        'min_age', 'max_age', 'toilets', 'baby_feeding', 'changing_table',
        'parking', 'bicycle_parking', 'drive_through', 'drive_in',
        'amenity'
    ]
    
    for prefix in handled_prefixes:
        if tag_key.startswith(prefix) or tag_key == prefix.rstrip(':'):
            return True
    
    return False

In [29]:
def extract_address(tags: Dict) -> Optional[Dict]:
    """Extract comprehensive address information."""
    address = {}
    address_keys = {
        'addr:street': 'street',
        'addr:housenumber': 'housenumber',
        'addr:housename': 'housename',
        'addr:postcode': 'postcode',
        'addr:city': 'city',
        'addr:country': 'country',
        'addr:state': 'state',
        'addr:province': 'province',
        'addr:district': 'district',
        'addr:suburb': 'suburb',
        'addr:hamlet': 'hamlet',
        'addr:place': 'place',
        'addr:unit': 'unit',
        'addr:floor': 'floor',
        'addr:door': 'door',
        'addr:full': 'full_address'
    }
    
    for osm_key, clean_key in address_keys.items():
        if osm_key in tags:
            address[clean_key] = tags[osm_key]
    
    return address if address else None

def extract_contact_info(tags: Dict) -> Optional[Dict]:
    """Extract contact information."""
    contact = {}
    contact_keys = [
        'phone', 'mobile', 'fax', 'email', 'website', 'url',
        'contact:phone', 'contact:mobile', 'contact:fax', 'contact:email', 
        'contact:website', 'contact:facebook', 'contact:instagram', 
        'contact:twitter', 'contact:youtube', 'facebook', 'instagram'
    ]
    
    for key in contact_keys:
        if key in tags:
            clean_key = key.replace('contact:', '')
            contact[clean_key] = tags[key]
    
    return contact if contact else None

def extract_business_info(tags: Dict) -> Optional[Dict]:
    """Extract business operating information."""
    business = {}
    business_keys = [
        'opening_hours', 'opening_hours:drive_through', 'opening_hours:kitchen',
        'operator', 'owner', 'ref', 'source', 'start_date', 'end_date',
        'description', 'note', 'fixme', 'capacity', 'stars', 'rooms'
    ]
    
    for key in business_keys:
        if key in tags:
            business[key] = tags[key]
    
    return business if business else None

def extract_accessibility_info(tags: Dict) -> Optional[Dict]:
    """Extract accessibility information."""
    accessibility = {}
    accessibility_keys = [
        'wheelchair', 'wheelchair:description', 'blind', 'deaf',
        'level', 'elevator', 'ramp', 'tactile_paving',
        'step_count', 'handrail', 'width', 'door:width'
    ]
    
    for key in accessibility_keys:
        if key in tags:
            accessibility[key] = tags[key]
    
    return accessibility if accessibility else None


def extract_food_drink_info(tags: Dict) -> Optional[Dict]:
    """Extract food and drink specific information."""
    food_drink = {}
    
    # Cuisine and dietary information
    cuisine_keys = ['cuisine', 'drink', 'brewery', 'microbrewery']
    
    # Dietary restrictions and options
    diet_keys = [k for k in tags.keys() if k.startswith('diet:')]
    
    # Service types
    service_keys = [
        'outdoor_seating', 'takeaway', 'delivery', 'drive_through', 'drive_in',
        'reservation', 'smoking', 'bar', 'restaurant', 'self_service',
        'breakfast', 'lunch', 'dinner', 'happy_hour'
    ]
    
    all_food_keys = cuisine_keys + diet_keys + service_keys
    
    for key in all_food_keys:
        if key in tags:
            food_drink[key] = tags[key]
    
    return food_drink if food_drink else None


def extract_location_info(tags: Dict) -> Optional[Dict]:
    """Extract location and building information."""
    location = {}
    location_keys = [
        'building', 'building:levels', 'building:material', 'building:colour',
        'level', 'indoor', 'room', 'location', 'addr:floor',
        'roof:shape', 'roof:material', 'roof:colour', 'height'
    ]
    
    for key in location_keys:
        if key in tags:
            location[key] = tags[key]
    
    return location if location else None


def extract_entertainment_info(tags: Dict) -> Optional[Dict]:
    """Extract entertainment specific information."""
    entertainment = {}
    entertainment_keys = [
        'dance:teaching', 'dance:style', 'music:genre', 'live_music',
        'karaoke', 'dj', 'game', 'gambling', 'adult', 'min_age', 'max_age',
        'dress_code', 'entrance_fee', 'show', 'performance',
        'screen', 'projection', '3d', 'imax'
    ]
    
    for key in entertainment_keys:
        if key in tags:
            entertainment[key] = tags[key]
    
    return entertainment if entertainment else None


def extract_payment_info(tags: Dict) -> Optional[Dict]:
    """Extract payment and pricing information."""
    payment = {}
    
    # Payment methods
    payment_keys = [k for k in tags.keys() if k.startswith('payment:')]
    
    # Pricing
    pricing_keys = [
        'fee', 'charge', 'price', 'cost', 'toll', 'entrance_fee',
        'membership_fee', 'accepted_cards', 'cash'
    ]
    
    all_payment_keys = payment_keys + pricing_keys
    
    for key in all_payment_keys:
        if key in tags:
            payment[key] = tags[key]
    
    return payment if payment else None


def extract_services_info(tags: Dict) -> Optional[Dict]:
    """Extract additional services and facilities."""
    services = {}
    services_keys = [
        'internet_access', 'internet_access:fee', 'internet_access:ssid',
        'wifi', 'computer', 'atm', 'toilets', 'baby_feeding', 'changing_table',
        'shower', 'lockers', 'lost_property', 'luggage_storage',
        'parking', 'parking:fee', 'bicycle_parking', 'motorcycle_parking',
        'air_conditioning', 'heating', 'fireplace', 'garden', 'terrace',
        'balcony', 'playground', 'dog', 'pets_allowed', 'service',
        'self_service', 'full_service', 'valet', 'cloakroom'
    ]
    
    for key in services_keys:
        if key in tags:
            services[key] = tags[key]
    
    return services if services else None

In [30]:
def extract_amenity_info(osm_object, obj_type: str, 
                        lat: Optional[float] = None, 
                        lon: Optional[float] = None) -> Dict:
    """Extract comprehensive information from an OSM object."""
    # Get coordinates
    if obj_type == 'node':
        coordinates = {'lat': float(osm_object.lat), 'lon': float(osm_object.lon)}
    else:
        coordinates = {'lat': lat, 'lon': lon} if lat and lon else None
    
    amenity_info = {
        'osm_id': osm_object.id,
        'osm_type': obj_type,
        'coordinates': coordinates,
        
        # Basic identification
        'name': osm_object.tags.get('name', None),
        'amenity': osm_object.tags.get('amenity'),
        
        # Alternative names
        'alt_name': osm_object.tags.get('alt_name'),
        'official_name': osm_object.tags.get('official_name'),
        'short_name': osm_object.tags.get('short_name'),
        'local_name': osm_object.tags.get('loc_name'),
        'old_name': osm_object.tags.get('old_name'),
        'brand': osm_object.tags.get('brand'),
        
        # Structured information
        'address': extract_address(osm_object.tags),
        'contact': extract_contact_info(osm_object.tags),
        'business_info': extract_business_info(osm_object.tags),
        'accessibility': extract_accessibility_info(osm_object.tags),
        'food_drink': extract_food_drink_info(osm_object.tags),
        'location_info': extract_location_info(osm_object.tags),
        'entertainment': extract_entertainment_info(osm_object.tags),
        'payment': extract_payment_info(osm_object.tags),
        'services': extract_services_info(osm_object.tags),
        
        # All other tags
        'other_tags': {k: v for k, v in osm_object.tags.items() 
                      if not is_handled_tag(k)}
    }
    
    return amenity_info

In [31]:
def build_overpass_query(amenity_type: str, polygon_coords_string: str) -> str:
    """Build Overpass API query for a specific amenity type."""
    return f"""
    [out:json][timeout:60];
    (
      node["amenity"="{amenity_type}"](poly:"{polygon_coords_string}");
      way["amenity"="{amenity_type}"](poly:"{polygon_coords_string}");
      relation["amenity"="{amenity_type}"](poly:"{polygon_coords_string}");
    );
    out center meta;
    """

In [32]:
def process_way_center(way) -> Optional[Tuple[float, float]]:
    """Calculate center coordinates for a way object."""
    if hasattr(way, 'center_lat') and hasattr(way, 'center_lon'):
        return float(way.center_lat), float(way.center_lon)
    elif hasattr(way, 'nodes') and way.nodes:
        total_lat = sum(float(node.lat) for node in way.nodes)
        total_lon = sum(float(node.lon) for node in way.nodes)
        return total_lat / len(way.nodes), total_lon / len(way.nodes)
    elif hasattr(way, 'nd') and way.nd:
        total_lat = sum(float(node.lat) for node in way.nd)
        total_lon = sum(float(node.lon) for node in way.nd)
        return total_lat / len(way.nd), total_lon / len(way.nd)
    return None


def process_relation_center(relation) -> Optional[Tuple[float, float]]:
    """Find center coordinates for a relation object."""
    if hasattr(relation, 'center_lat') and hasattr(relation, 'center_lon'):
        return float(relation.center_lat), float(relation.center_lon)
    elif hasattr(relation, 'members') and relation.members:
        for member in relation.members:
            if hasattr(member, 'lat') and hasattr(member, 'lon'):
                return float(member.lat), float(member.lon)
    return None

In [33]:
def query_amenity(api: overpy.Overpass, amenity_type: str, 
                 polygon_coords_string: str) -> List[Dict]:
    """Query OSM for a specific amenity type within a polygon."""
    if not polygon_coords_string:
        print("Error: No polygon coordinates available")
        return []
    
    overpass_query = build_overpass_query(amenity_type, polygon_coords_string)
    
    try:
        print(f"Querying {amenity_type} within custom polygon...")
        result = api.query(overpass_query)
        
        amenities = []
        
        # Process nodes
        for node in result.nodes:
            amenity_info = extract_amenity_info(node, 'node')
            amenities.append(amenity_info)
        
        # Process ways
        for way in result.ways:
            center = process_way_center(way)
            if center:
                center_lat, center_lon = center
                amenity_info = extract_amenity_info(way, 'way', center_lat, center_lon)
                amenities.append(amenity_info)
        
        # Process relations
        for relation in result.relations:
            center = process_relation_center(relation)
            if center:
                center_lat, center_lon = center
                amenity_info = extract_amenity_info(relation, 'relation', center_lat, center_lon)
                amenities.append(amenity_info)
        
        print(f"‚úÖ Found {len(amenities)} {amenity_type}(s) in custom polygon area")
        return amenities
        
    except Exception as e:
        print(f"‚ùå Error querying {amenity_type}: {e}")
        print(f"   This might be due to:")
        print(f"   - High server load (try again later)")
        print(f"   - Network connectivity issues")
        print(f"   - Overpass API timeout")
        print(f"   - Invalid polygon coordinates")
        return []

In [34]:
def save_to_json(amenities: List[Dict], amenity_type: str, 
                polygon: Polygon, filename: Optional[str] = None):
    """Save amenity data to a JSON file."""
    summary = {
        'query_info': {
            'amenity_type': amenity_type,
            'total_found': len(amenities),
            'query_timestamp': time.strftime("%Y-%m-%d %H:%M:%S"),
            'polygon_bounds': polygon.bounds if polygon else None
        },
        'amenities': amenities
    }
    
    try:
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(summary, f, indent=2, ensure_ascii=False)
        print(f"‚úÖ {len(amenities)} {amenity_type}(s) with detailed info saved to {filename}")
    except Exception as e:
        print(f"‚ùå Error saving {amenity_type} data: {e}")

In [35]:
def find_all_amenities(coordinates: List, amenities_dict: Dict, 
                      api: overpy.Overpass, searchIfAlreadyFound = False, delay: int = 2):
    """Find all amenities in the specified polygon area."""
    print("üîç Finding amenities with comprehensive details in custom polygon area...")
    print("=" * 80)
    
    # Create polygon
    polygon = create_polygon(coordinates)
    polygon_coords_string = get_polygon_coords_string(polygon)
    
    print(f"üìç Polygon area: ~{polygon.area * 111000 * 111000:.0f} square meters")
    print(f"üîé Searching for {len(amenities_dict)} amenity types with detailed tags...")
    print("-" * 50)
    
    total_found = 0
    
    for amenity_type, description in amenities_dict.items():
        print(f"\nüîé Searching for {amenity_type}: {description}")
        filename = f"custom_area_{amenity_type}s_detailed.json"
        if searchIfAlreadyFound or not os.path.exists(filename):
            amenities = query_amenity(api, amenity_type, polygon_coords_string)
            
            if amenities:
                save_to_json(amenities, amenity_type, polygon, filename)
                total_found += len(amenities)
            else:
                print(f"   No {amenity_type}s found in this polygon area")
            
            # Be respectful to the API
            time.sleep(delay)
    
    print("\n" + "=" * 80)
    print(f"üéâ Total amenities found: {total_found}")
    print(f"üìÅ Created detailed JSON files for each amenity type")
    print("üí° Files contain comprehensive OSM tag information")
    print("üìä Each file includes query metadata and structured data")

In [36]:
find_all_amenities(COORDINATES, AMENITIES, API, True)

üîç Finding amenities with comprehensive details in custom polygon area...
üìç Polygon area: ~154441969 square meters
üîé Searching for 5 amenity types with detailed tags...
--------------------------------------------------

üîé Searching for ice_cream: Ice cream parlors
Querying ice_cream within custom polygon...
‚úÖ Found 12 ice_cream(s) in custom polygon area
‚úÖ 12 ice_cream(s) with detailed info saved to custom_area_ice_creams_detailed.json

üîé Searching for food_court: Areas with multiple food vendors
Querying food_court within custom polygon...
‚úÖ Found 1 food_court(s) in custom polygon area
‚úÖ 1 food_court(s) with detailed info saved to custom_area_food_courts_detailed.json

üîé Searching for nightclub: Places for dancing and drinking at night
Querying nightclub within custom polygon...
‚úÖ Found 3 nightclub(s) in custom polygon area
‚úÖ 3 nightclub(s) with detailed info saved to custom_area_nightclubs_detailed.json

üîé Searching for events_venue: Buildings specific

In [37]:
otherKeys = []
result = []
for file in os.listdir(os.getcwd()):
    if file.startswith("custom_area"):
        with open(file, encoding='utf-8') as f:
            queryResult = json.load(f)

        category = " ".join(queryResult["query_info"]["amenity_type"].split("_"))

        for amenity in queryResult["amenities"]:
            out = {
                "Spazio": amenity["name"],
                "Categoria": category,
                "latitudine": amenity["coordinates"]["lat"],
                "longitudine": amenity["coordinates"]["lon"]
            }
            
            for k in amenity:
                if (k not in ["name", "amenity", "coordinates", "osm_type", "other_tags", "osm_id", "official_name", "payment", "food_drink"]):
                    if amenity[k] is not None:
                        if k not in otherKeys:
                            otherKeys.append(k) 
                        if isinstance(amenity[k], str):
                            out[k] = amenity[k]
                        elif isinstance(amenity[k], dict):
                            out[k] = " ".join([str(v) for v in amenity[k].values()])
                        
            result.append(out)

In [38]:
otherKeys, result

(['accessibility',
  'address',
  'contact',
  'business_info',
  'location_info',
  'services',
  'alt_name',
  'brand',
  'old_name',
  'entertainment'],
 [{'Spazio': 'Spaccabari',
   'Categoria': 'arts centre',
   'latitudine': 41.1282103,
   'longitudine': 16.8728567,
   'accessibility': 'no'},
  {'Spazio': 'BLUorg',
   'Categoria': 'arts centre',
   'latitudine': 41.121101,
   'longitudine': 16.8756672},
  {'Spazio': 'Spazio 13',
   'Categoria': 'arts centre',
   'latitudine': 41.1246725,
   'longitudine': 16.8560676,
   'address': 'Via Colonello de Cristoforis 8 70123 Bari',
   'contact': 'https://spazio13.org',
   'business_info': 'Mo-Fr 10:00-20:00',
   'accessibility': 'yes'},
  {'Spazio': "Ipogeo Dell'Arte",
   'Categoria': 'arts centre',
   'latitudine': 41.1626943,
   'longitudine': 16.7482118,
   'address': 'Via Pisa 4 70127 Santo Spirito',
   'contact': 'https://www.leonardobasile.it/index.htm',
   'accessibility': 'no -1',
   'location_info': '-1'},
  {'Spazio': 'Spazio 

In [39]:
traduzioni = {
    'arts centre': 'Centro culturale',
    'bar': 'Bar',
    'cafe': 'Caff√®',
    'casino': 'Casin√≤',
    'cinema': 'Cinema',
    'community centre': 'Centro sociale',
    'events venue': 'Sede eventi',
    'fast food': 'Fast food',
    'food court': 'Area ristorazione',
    'ice cream': 'Gelateria',
    'internet cafe': 'Internet caf√©',
    'marketplace': 'Mercato',
    'monastery': 'Monastero',
    'nightclub': 'Discoteca',
    'place of worship': 'Luogo di culto',
    'pub': 'Pub',
    'restaurant': 'Ristorante',
    'music venue': 'Locale musicale',
    'social centre': 'Centro sociale',
}

# Per applicare le traduzioni ai tuoi dati:
for x in result:
    x["Categoria"] = traduzioni.get(x["Categoria"], x["Categoria"])

In [42]:
with open ("Amenities.json", "w", encoding='utf-8') as f:
    json.dump(result, f, indent=4, ensure_ascii=False)