In [1]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import pandas as pd
import os
import sys
from pathlib import Path
import time
import numpy as np
import datetime
import overpy
import geopandas as gpd
import requests
import json
import numpy as np
import matplotlib.pyplot as plt

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

laptop = 'forgach1mar822'

## Getting data from overpass API

#### Required data
- Bus stop
- Tram stop
- Trolleybus stop
- Subway stop
- Light rail stop (HÉV)
- School, High school, University
- Café, restaurant
- Shopping mall


##### Final OVERPASS API QUERY
- public_transport_platform_query = """area[name="Budapest"]->.Bp; node(area.Bp)["public_transport"="platform"];out meta; """ <br>
- public_transport_stop_query     = """area[name="Budapest"]->.Bp; node(area.Bp)["public_transport"="stop_position"];out meta; """ <br>
- cafe_query                      = """area[name="Budapest"]->.Bp; node(area.Bp)["amenity"="cafe"];out meta; """ <br>
- restaurant_query                = """area[name="Budapest"]->.Bp; node(area.Bp)["amenity"="restaurant"];out meta; """ <br>
- fast_food_query                 = """area[name="Budapest"]->.Bp; node(area.Bp)["amenity"="fast_food"];out meta; """ <br>
- pharmacy_query                  = """area[name="Budapest"]->.Bp; node(area.Bp)["amenity"="pharmacy"];out meta; """ <br>
- cinema_query                    = """area[name="Budapest"]->.Bp; node(area.Bp)["amenity"="cinema"];out meta; """ <br>
- bank_query                      = """area[name="Budapest"]->.Bp; node(area.Bp)["amenity"="bank"];out meta; """ <br>
- atm_query                       = """area[name="Budapest"]->.Bp; node(area.Bp)["amenity"="atm"];out meta; """ <br>
- post_office_query               = """area[name="Budapest"]->.Bp; node(area.Bp)["amenity"="post_office"];out meta; """ <br>
- theatre_query                   = """area[name="Budapest"]->.Bp; node(area.Bp)["amenity"="theatre"];out meta; """ <br>
- pub_query                       = """area[name="Budapest"]->.Bp; node(area.Bp)["amenity"="pub"];out meta; """ <br>
- bar_query                       = """area[name="Budapest"]->.Bp; node(area.Bp)["amenity"="bar"];out meta; """ <br>
- bakery_query                    = """area[name="Budapest"]->.Bp; node(area.Bp)["shop"="bakery"];out meta; """ <br>

#### Overpass query

In [12]:
overpass_url = "http://overpass-api.de/api/interpreter"
overpass_query = """
[out:json];
(
area[name="Budapest"]->.Bp;
  node["amenity"="cinema"](area.Bp);
  node["public_transport"="platform"](area.Bp);
  node["public_transport"="stop_position"](area.Bp);
  node["amenity"="cafe"](area.Bp);
  node["amenity"="restaurant"](area.Bp);
  node["amenity"="fast_food"](area.Bp);
  node["amenity"="pharmacy"](area.Bp);
  node["amenity"="bank"](area.Bp);
  node["amenity"="atm"](area.Bp);
  node["amenity"="post_office"](area.Bp);
  node["amenity"="theatre"](area.Bp);
  node["amenity"="pub"](area.Bp);
  node["amenity"="bar"](area.Bp);
  node["shop"="bakery"](area.Bp);
);
out center;
"""
response = requests.get(overpass_url, 
                        params={'data': overpass_query})
data = response.json()

In [13]:
overpass_df = pd.json_normalize(data['elements'])
overpass_df.head(2)
len(overpass_df)

Unnamed: 0,type,id,lat,lon,tags.light_rail,tags.local_ref,tags.name,tags.operator,tags.passenger_information_display,tags.public_transport,tags.railway,tags.ref:mav,tags.train,tags.amenity,tags.start_date,tags.wheelchair,tags.wikidata,tags.wikipedia,tags.old_name,tags.bench,tags.bus,tags.highway,tags.ref:bkk,tags.shelter,tags.verified,tags.stopColorType,tags.tram,tags.drive_through,tags.addr:city,tags.addr:housenumber,tags.addr:postcode,tags.addr:street,tags.contact:phone,tags.contact:website,tags.opening_hours,tags.payment:american_express,tags.payment:cash,tags.payment:maestro,tags.payment:mastercard,tags.payment:visa,tags.payment:visa_electron,tags.atm,tags.brand,tags.website,tags.bin,tags.departures_board,tags.lit,tags.network,tags.surface,tags.tactile_paving,tags.covered,tags.old_name:1948-1990,tags.alt_name,tags.capacity,tags.dog,tags.email,tags.facebook,tags.internet_access,tags.note,tags.phone,tags.screen,tags.addr:country,tags.ref:vatin:hu,tags.branch,tags.brand:wikidata,tags.brand:wikipedia,tags.contact:email,tags.contact:facebook,tags.contact:instagram,tags.contact:youtube,tags.operator:addr,tags.payment:debit_cards,tags.ref:vatin,tags.source:www.posta.hu:date,tags.ref,tags.created_by,tags.layer,tags.description,tags.level,tags.healthcare,tags.name:de,tags.name:en,tags.name:hu,tags.cuisine,tags.wheelchair:description,tags.name:it,tags.air_conditioning,tags.bic,tags.ref:HU:company,tags.source:www.cib.hu:date,tags.outdoor_seating,tags.toilets:wheelchair,tags.opening_hours:covid19,tags.source:www.budapestbank.hu:date,tags.short_name,tags.takeaway,tags.wifi,tags.internet_access:fee,tags.self_service,tags.toilets,tags.smoking,tags.trolleybus,tags.lift,tags.lines,tags.chip_scanner,tags.diet:vegetarian,tags.diet:vegan,tags.lunch,tags.survey:date,tags.stop_date,tags.shop,tags.craft,tags.payment:credit_cards,tags.payment:contactless,tags.payment:mastercard_contactless,tags.payment:mastercard_electronic,tags.theatre:genre,tags.entrance,tags.disused:contact:website,tags.food,tags.delivery,tags.fixme,tags.is_in,tags.url,tags.contact:fax,tags.name:fr,tags.name:ru,tags.name:zh,tags.subway,tags.old_name:1990-2019,tags.old_name:1984-2020,tags.ref:MAV,tags.opening_hours:signed,tags.fax,tags.line,tags.lines_1,tags.waste_basket,tags.source,tags.old_name:1949-1990,tags.official_name,tags.internet_access:ssid,tags.lunch:buffet,tags.lunch:menu,tags.lunch:menu:course:dessert,tags.lunch:menu:course:main,tags.lunch:menu:course:soup,tags.lunch:menu:url,tags.payment:szep,tags.old_alt_name,tags.free_refill,tags.last_checked,tags.cash,tags.brewery,tags.amenity_1,tags.animal,tags.payment:visa_debit,tags.club,tags.owncup,tags.owncup:discount,tags.lunch:menu:choices,tags.payment:coins,tags.int_name,tags.reservation,tags.free_refill_note,tags.tourism,tags.check_date:opening_hours,tags.source:opening_hours,tags.dispensing,tags.railway:ref,tags.website_1,tags.payment:meal_voucher,tags.diet:dairy_free,tags.diet:lactose_free,tags.payment:cryptocurrencies,tags.payment:electronic_purses,tags.sport,tags.opening_hours:kitchen,tags.background_music,tags.highchair,tags.contact:mobile,tags.internet_access:ssid_1,tags.internet_access:ssid_2,tags.lunch:menu:subscription,tags.lunch:menu:takeaway,tags.contact:foursquare,tags.check_date,tags.currency:HUF,tags.disused:amenity,tags.architect,tags.contact:website:2,tags.name:ka,tags.noplatform,tags.addr:subdistrict,tags.addr:suburb,tags.old_postal_code,tags.bus_routes,tags.ref:bkk2,tags.alt_name:ru,tags.accuracy,tags.public_transport:version,tags.mapillary,tags.abandoned:amenity,tags.dress_code,tags.opening_hours:url,tags.ref:ruian:addr,tags.old_name:-2013-09-01,tags.crossing,tags.addr:interpolation,tags.payment:notes,tags.payment:telephone_cards,tags.designation,tags.contact:phone_1,tags.name:ko,tags.indoor,tags.lunch:menu:publish_time,tags.addr:place,tags.contact:phone:delivery,tags.ref:company:HU,tags.addr:housename,tags.lunch:menu:price,tags.building:levels,tags.contact:messenger,tags.place,tags.billiards:pool,tags.billiards:snooker,tags.contact:skype,tags.payment:debit_cards:minimum,tags.kids_area,tags.tables,tags.toilets:changing_table,tags.breakfast,tags.fast_food,tags.stairs,tags.stars,tags.name:es,tags.loc_ref,tags.source:name,tags.gambling,tags.name:fi,tags.leisure,tags.sport_1,tags.payment:erzsebet,tags.payment:otp_cafeteria,tags.shelter_type,tags.waste,tags.books,tags.old_name2,tags.ref2:bkk,tags.lunch:menu:cuisine,tags.instagram,tags.artisan,tags.description:hu,tags.pre_order,tags.contact:twitter,tags.route_ref,tags.handrail:left,tags.handrail:right,tags.step_count,tags.source:date,tags.fire_hydrant:type,tags.lunch:buffet:cost,tags.transport,tags.second_hand,tags.addr:unit,tags.organic,tags.cash_in,tags.email:2,tags.live_music,tags.lunch:menu:cost,tags.happy_hours,tags.contact:email_1,tags.disused:website,tags.fee,tags.alt_name_1,tags.addr:state,tags.image,tags.toilets:access,tags.ethnicity,tags.colour,tags.ele,tags.atm:bitcoin,tags.currency:BTC,tags.currency:XBT,tags.payment:bitcoin,tags.lgbtq,tags.access,tags.real_ale,tags.website:hu,tags.phone:mobile,tags.addr:full,tags.biergarten,tags.changing_table,tags.payment:bank_card,tags.children,tags.outdoor_seating:wheelchair,tags.toilets:hot_water,tags.tv,tags.tv:description,tags.diet:gluten_free,tags.payment:szep:otp,tags.power_supply,tags.power_supply:voltage,tags.service_times,tags.tv:seasonal,tags.name:ar,tags.twitter,tags.description:en,tags.cocktails,tags.distillery,tags.FIXME,tags.opening_date,tags.source:www.magnetbank.hu:date,tags.beer_garden,tags.status,tags.ref:bkktelebusz,tags.name_old,tags.service:bicycle:rental,tags.service:bicycle:retail,tags.diet:sugar_free,tags.addr:conscriptionnumber,tags.barrier,tags.kerb,tags.wheelchair:description:hu,tags.old_name:2014-2019,tags.inscription,tags.service:bicycle:diy,tags.gay,tags.source:addr,tags.name:he,tags.strapline,tags.microbrewery,tags.payment:jcb,tags.currency:EUR,tags.old_brand,tags.contact:linkedin,tags.payment:cards,tags.drive_in,tags.source:operator,tags.description:covid19,tags.contact:website_1,tags.floor,tags.building:levels:aboveground,tags.building:levels:underground,tags.name:pl,tags.opening_hours:happy_hour,tags.ferry,tags.mooring,tags.happy_hours:food,tags.lunch:menu:main,tags.lunch:menu:email,tags.payment:erzsebet_plusz,tags.payment:szep:kh,tags.payment:szep:mkb,tags.building,tags.ope,tags.diet:raw,tags.lunch:menu:delivery,tags.diet:kosher,tags.office,tags.coffee,tags.access:description,tags.operational_status,tags.diet:lacto-ovo_vegetarian,tags.diet:lacto_vegetarian,tags.diet:milk_free,tags.diet:ovo_vegetarian,tags.diet:soy_free,tags.diet:specialty_coffee,tags.nat_name,tags.lunch:menu:course:sandwich,tags.noname,tags.theatre:type,tags.payment:paypass,tags.owncup_discount,tags.levels,tags.sport:pinball,tags.sport:table_soccer,tags.payment:card,tags.lunch:menu:course:drink,tags.note:hu,tags.operator:wikidata,tags.operator:wikipedia,tags.bar,tags.support,tags.currency,tags.payment,tags.addr:door,tags.addr:floor,tags.name:zh_pinyin,tags.payment:erzsebet_plus,tags.payment:otp_bankpont,tags.strapline:ru,tags.drink,tags.language:en,tags.language:hu,tags.local_name,tags.MCC,tags.MNC,tags.communication:mobile_phone,tags.gsm:LAC,tags.gsm:cellid,tags.location,tags.lte:LAC,tags.lte:cellid,tags.lte:eNB,tags.umts:LAC,tags.umts:PSC,tags.umts:RNC,tags.umts:cellid,tags.contact:url,tags.min_age,tags.payment:diners_club,tags.swimming_pool,tags.ISO3166-2,tags.admin_level,tags.boundary,tags.name:hr,tags.name:lt,tags.name:nl,tags.name:ro,tags.type,tags.language,tags.name:sr
0,node,26746770,47.507152,19.039409,yes,2.0,Batthyány tér,MÁV-HÉV,yes,stop_position,stop,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,node,30082680,47.469158,19.243233,,,Rákoshegy,MÁV,,stop_position,stop,958.0,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


11193

In [14]:
overpass_gdf = gpd.GeoDataFrame(
    overpass_df, geometry=gpd.points_from_xy(overpass_df.lon, overpass_df.lat))
#overpass_gdf.head(3)

In [57]:
#gdf.to_excel("emptyvalue.xlsx")

### Drop not used columns

In [3]:
overpass_gdf = overpass_gdf.drop([
    'type', 'id', 'tags.ref:bkk', 'tags.shelter', 'tags.wheelchair', 'tags.bin', 'tags.departures_board', 'tags.lit', 'tags.network', 'tags.surface', 'tags.tactile_paving', 'tags.covered',
    'tags.internet_access', 'tags.mooring', 'tags.layer', 'tags.wikidata', 'tags.route_ref', 'tags.level', 'tags.ref:mav', 'tags.start_date', 'tags.wikipedia', 'tags.old_name', 'tags.highway', 'tags.drive_through', 'tags.addr:city', 'tags.addr:housenumber', 'tags.contact:phone', 'tags.contact:website', 'tags.opening_hours', 'tags.payment:american_express', 'tags.payment:cash', 'tags.payment:maestro',
    'tags.payment:mastercard', 'tags.payment:visa', 'tags.payment:visa_electron', 'tags.old_name:1948-1990', 'tags.capacity', 'tags.dog', 'tags.email', 'tags.facebook', 'tags.screen', 'tags.ref:vatin:hu', 'tags.branch', 'tags.brand:wikidata', 'tags.brand:wikipedia', 'tags.contact:email', 'tags.contact:facebook',
    'tags.contact:instagram', 'tags.contact:youtube', 'tags.operator:addr', 'tags.payment:debit_cards', 'tags.ref:vatin', 'tags.source:www.posta.hu:date', 'tags.created_by', 'tags.healthcare', 'tags.cuisine',
    'tags.name:it', 'tags.air_conditioning', 'tags.bic', 'tags.source:www.cib.hu:date', 'tags.outdoor_seating', 'tags.opening_hours:covid19', 'tags.source:www.budapestbank.hu:date', 'tags.takeaway', 'tags.self_service', 'tags.smoking', 'tags.chip_scanner', 'tags.diet:vegetarian', 'tags.diet:vegan', 'tags.lunch', 'tags.survey:date', 'tags.stop_date', 
    'tags.shop', 'tags.craft', 'tags.payment:credit_cards', 'tags.payment:contactless', 'tags.payment:mastercard_contactless', 'tags.payment:mastercard_electronic', 'tags.theatre:genre', 'tags.entrance', 'tags.disused:contact:website', 
    'tags.delivery', 'tags.is_in', 'tags.url', 'tags.contact:fax', 'tags.name:zh','tags.old_name:1990-2019', 'tags.old_name:1984-2020', 'tags.opening_hours:signed', 'tags.fax', 'tags.old_name:1949-1990',  'tags.internet_access:ssid', 'tags.lunch:buffet', 'tags.lunch:menu',
    'tags.lunch:menu:course:dessert', 'tags.lunch:menu:course:main', 'tags.lunch:menu:course:soup', 'tags.lunch:menu:url', 'tags.payment:szep', 'tags.source:addr:housenumber', 'tags.free_refill', 'tags.last_checked', 'tags.cash', 'tags.brewery',
    'tags.animal', 'tags.payment:visa_debit', 'tags.club', 'tags.owncup', 'tags.owncup:discount', 'tags.lunch:menu:choices', 'tags.payment:coins',  'tags.free_refill_note', 'tags.tourism', 
    'tags.check_date:opening_hours', 'tags.source:opening_hours', 'tags.dispensing',  'tags.website_1', 'tags.payment:meal_voucher', 'tags.payment:cryptocurrencies', 'tags.payment:electronic_purses', 'tags.sport', 
    'tags.opening_hours:kitchen', 'tags.background_music', 'tags.highchair', 'tags.contact:phone_1', 'tags.contact:phone_2', 'tags.contact:phone_3', 'tags.internet_access:ssid_1', 'tags.internet_access:ssid_2', 'tags.lunch:menu:subscription', 
    'tags.lunch:menu:takeaway', 'tags.website_2', 'tags.currency:huf', 'tags.architect', 'tags.contact:website:2', 'tags.public_transport:version', 'tags.dress_code', 'tags.opening_hours:url', 'tags.ref:ruian:addr', 'tags.old_name:-2013-09-01', 'tags.crossing', 'tags.addr:interpolation', 
    'tags.payment:notes', 'tags.payment:telephone_cards', 'tags.designation', 'tags.name:ko', 'tags.indoor', 'tags.lunch:menu:publish_time', 'tags.addr:place', 'tags.contact:phone:delivery', 'tags.addr:housename', 'tags.lunch:menu:price', 'tags.building:levels', 'tags.contact:messenger', 'tags.place', 'tags.billiards:pool', 'tags.billiards:snooker', 
    'tags.contact:google_plus', 'tags.contact:skype', 'tags.payment:debit_cards:minimum', 'tags.kids_area', 'tags.tables', 'tags.toilets:changing_table', 'tags.breakfast',  'tags.stairs', 'tags.stars', 'tags.name:es', 'tags.loc_ref', 'tags.gambling', 'tags.name:fi', 'tags.leisure', 'tags.sport_1', 'tags.payment:erzsebet', 
    'tags.payment:otp_cafeteria', 'tags.books', 'tags.old_name2', 'tags.lunch:menu:cuisine', 'tags.instagram', 'tags.artisan',  'tags.pre_order', 'tags.contact:twitter', 'tags.handrail:left',
    'tags.handrail:right', 'tags.step_count', 'tags.source:date', 'tags.fire_hydrant:type', 'tags.lunch:buffet:cost', 'tags.transport', 'tags.reservation', 'tags.second_hand', 'tags.addr:unit', 'tags.organic', 'tags.cash_in', 'tags.currency:HUF', 'tags.email:2', 'tags.live_music', 'tags.lunch:menu:cost', 'tags.happy_hours', 'tags.contact:email_1', 'tags.contact:foursquare', 
    'tags.disused:website', 'tags.fee', 'tags.alt_name_1', 'tags.addr:state', 'tags.image', 'tags.toilets:access', 'tags.ethnicity', 'tags.contact:mobile', 'tags.colour', 'tags.diet:lactose_free', 'tags.ele', 'tags.atm:bitcoin',
    'tags.currency:BTC', 'tags.currency:XBT', 'tags.payment:bitcoin', 'tags.check_date', 'tags.lgbtq', 'tags.access', 'tags.real_ale', 'tags.website:hu', 'tags.phone:mobile', 'tags.addr:full', 'tags.biergarten', 'tags.changing_table', 
    'tags.payment:bank_card', 'tags.children', 'tags.outdoor_seating:wheelchair', 'tags.toilets:hot_water', 'tags.tv', 'tags.diet:gluten_free', 'tags.payment:szep:otp', 'tags.power_supply', 'tags.power_supply:voltage', 
    'tags.service_times', 'tags.tv:seasonal', 'tags.name:ar', 'tags.twitter', 'tags.description:en', 'tags.cocktails', 'tags.distillery', 'tags.FIXME', 'tags.opening_date', 'tags.source:www.magnetbank.hu:date',  
    'tags.name_old', 'tags.service:bicycle:rental', 'tags.service:bicycle:retail', 'tags.diet:sugar_free', 'tags.addr:conscriptionnumber', 'tags.wheelchair:description:hu', 'tags.old_name:2014-2019', 'tags.service:bicycle:diy',
    'tags.gay', 'tags.source:addr', 'tags.name:he', 'tags.strapline', 'tags.microbrewery', 'tags.payment:jcb', 'tags.currency:EUR', 'tags.old_brand', 'tags.contact:linkedin', 'tags.payment:cards', 'tags.drive_in', 'tags.source:operator', 'tags.description:covid19',
    'tags.contact:website_1', 'tags.floor', 'tags.building:levels:aboveground',  'tags.name:pl', 'tags.opening_hours:happy_hour', 'tags.website2', 'tags.ferry', 'tags.happy_hours:food', 'tags.lunch:menu:main', 'tags.lunch:menu:email', 
    'tags.payment:erzsebet_plusz', 'tags.payment:szep:kh', 'tags.payment:szep:mkb',  'tags.ope', 'tags.diet:raw', 'tags.lunch:menu:delivery', 'tags.diet:kosher', 'tags.office', 'tags.access:description', 'tags.operational_status', 
    'tags.diet:dairy_free', 'tags.diet:lacto-ovo_vegetarian', 'tags.diet:lacto_vegetarian', 'tags.diet:milk_free', 'tags.diet:ovo_vegetarian', 'tags.diet:soy_free', 'tags.diet:specialty_coffee', 'tags.nat_name', 'tags.lunch:menu:course:sandwich', 'tags.noname', 'tags.theatre:type', 'tags.payment:paypass', 'tags.owncup_discount', 'tags.levels', 'tags.sport:pinball', 
    'tags.sport:table_soccer', 'tags.payment:card', 'tags.lunch:menu:course:drink', 'tags.note:hu', 'tags.operator:wikidata',  'tags.operator:wikipedia', 'tags.bar', 'tags.support', 'tags.currency', 'tags.payment', 'tags.addr:door', 'tags.addr:floor',
    'tags.name:zh_pinyin', 'tags.payment:erzsebet_plus', 'tags.payment:otp_bankpont', 'tags.strapline:ru', 'tags.language:en',  'tags.local_name', 'tags.MCC', 'tags.MNC', 'tags.communication:mobile_phone', 
    'tags.gsm:LAC', 'tags.gsm:cellid', 'tags.location', 'tags.lte:LAC', 'tags.lte:cellid', 'tags.lte:eNB', 'tags.umts:LAC', 'tags.local_ref', 'tags.passenger_information_display', 'tags.website', 'tags.bench', 'tags.stopColorType', 'tags.addr:postcode', 'tags.addr:street',
    'tags.phone', 'tags.name:de', 'tags.name:en', 'tags.wheelchair:description', 'tags.toilets:wheelchair', 'tags.internet_access:fee', 'tags.name:fr', 'tags.name:ru', 'tags.name:ka', 'tags.noplatform', 'tags.addr:subdistrict', 'tags.addr:suburb', 'tags.old_postal_code', 'tags.old_name_2', 'tags.accuracy', 'tags.mapillary', 'tags.shelter_type', 'tags.name:lt', 'tags.name:nl', 'tags.name:ro',
    'tags.ISO3166-2', 'tags.admin_level', 'tags.boundary', 'tags.name:hr', 'tags.fixme', 'tags.barrier', 'tags.kerb', 'tags.building:levels:underground', 'tags.language:hu', 'tags.umts:PSC', 'tags.umts:RNC', 'tags.umts:cellid', 'tags.contact:url', 'tags.min_age', 'tags.payment:diners_club', 'tags.swimming_pool', 'tags.ref', 'tags.line', 'tags.lines_1', 'tags.waste_basket', 'tags.source',
    'tags.int_name', 'tags.railway:ref', 'tags.tv:description', 'tags.status', 'tags.building', 'tags.type', 'tags.language', 'tags.alt_name', 'tags.note', 'tags.addr:country', 'tags.description', 'tags.name:hu', 'tags.ref:HU:company', 'tags.bus_routes', 'tags.ref:bkk2', 'tags.alt_name:ru',
    'tags.abandoned:amenity', 'tags.ref:company:HU', 'tags.lines', 'tags.food', 'tags.old_alt_name', 'tags.amenity_1', 'tags.name:sr', 'tags.coffee', 'tags.cafe', 'tags.drink', 'tags.wifi', 'tags.toilets', 'tags.old_name_1', 'tags.beer_garden', 'tags.ref:bkktelebusz',
    'tags.description:hu', 'tags.waste', 'tags.ref:bkk_1', 'tags.ref2:bkk', 'tags.atm', 'tags.disused:amenity', 'tags.fast_food', 'tags.source:name', 'tags.ref:MAV', 'tags.official_name', 'tags.operator', 'tags.brand', 'tags.verified', 'tags.short_name', 'tags.lift', 'tags.inscription' ]
, axis=1, errors='ignore')
#print(overpass_df.columns.tolist())
#print(overpass_df['tags_official_name'].unique().tolist())

#overpass_df.tags_operator.value_counts()
#overpass_df.tags_brand.value_counts()
#overpass_df.tags_verified.value_counts() #Fontos, ezekből 3766 esetben nem, 176 esetben verified!!! A maradék sokezerről meg pláne fogalmunk sincs.
#overpass_df.tags_short_name.value_counts()

In [4]:
overpass_gdf.head()

Unnamed: 0,lat,lon,tags.light_rail,tags.name,tags.public_transport,tags.railway,tags.train,tags.amenity,tags.bus,tags.tram,tags.trolleybus,tags.subway,geometry
0,47.507152,19.039409,yes,Batthyány tér,stop_position,stop,,,,,,,POINT (19.03941 47.50715)
1,47.469158,19.243233,,Rákoshegy,stop_position,stop,yes,,,,,,POINT (19.24323 47.46916)
2,47.525015,19.227709,,Kisherceg vendéglő,,,,restaurant,,,,,POINT (19.22771 47.52502)
3,47.566302,19.04736,,Aquincum,stop_position,stop,yes,,,,,,POINT (19.04736 47.56630)
4,47.598553,19.055045,yes,Békásmegyer,stop_position,stop,,,,,,,POINT (19.05504 47.59855)


### Replace "." to "_" in column names

In [21]:
overpass_gdf.columns = overpass_gdf.columns.str.replace(".", "_")
#overpass_gdf.head(2)

  overpass_gdf.columns = overpass_gdf.columns.str.replace(".", "_")


### Oszloptartalom konvertálása dummysitáshoz

In [29]:
overpass_gdf.tags_light_rail.value_counts()
overpass_gdf.tags_public_transport.value_counts()
overpass_gdf.tags_railway.value_counts()
overpass_gdf.tags_train.value_counts()
overpass_gdf.tags_bus.value_counts()
overpass_gdf.tags_tram.value_counts()
overpass_gdf.tags_trolleybus.value_counts()
overpass_gdf.tags_subway.value_counts()

yes    80
Name: tags_light_rail, dtype: int64

platform         4377
stop_position    1093
Name: tags_public_transport, dtype: int64

tram_stop    675
stop         285
halt           8
platform       5
station        2
Name: tags_railway, dtype: int64

yes    172
Name: tags_train, dtype: int64

yes    3968
no        2
Name: tags_bus, dtype: int64

yes    656
Name: tags_tram, dtype: int64

yes    115
Name: tags_trolleybus, dtype: int64

yes    91
no      1
Name: tags_subway, dtype: int64

In [6]:
#Replace all above names of overpass types to 'yes', for simplicity
overpass_gdf = overpass_gdf.replace(['tram_stop', 'stop', 'platform', 'stop_position', 'halt', 'station', 'yes'], 1)
#Replac all 'no' values to NaN
overpass_gdf = overpass_gdf.replace('no', np.nan)
#overpass_gdf.head(3)

In [7]:
overpass_gdf.tags_amenity.value_counts()

restaurant      1413
fast_food        861
cafe             783
pub              679
atm              493
pharmacy         357
bank             328
bar              218
post_office      129
theatre           45
cinema            20
bus_station        2
shelter            2
waste_basket       1
Name: tags_amenity, dtype: int64

#### One hot encoding amenity types

In [8]:
amenity_dummies = pd.get_dummies(overpass_gdf.tags_amenity)
overpass_gdf = pd.concat([overpass_gdf, amenity_dummies], axis=1)

#drop amenity, as it is one hot encoded
overpass_gdf = overpass_gdf.drop(['tags_amenity'] , axis=1)

In [10]:
overpass_gdf.head(2)

Unnamed: 0,lat,lon,tags_light_rail,tags_name,tags_public_transport,tags_railway,tags_train,tags_bus,tags_tram,tags_trolleybus,tags_subway,geometry,atm,bank,bar,bus_station,cafe,cinema,fast_food,pharmacy,post_office,pub,restaurant,shelter,theatre,waste_basket
0,47.507152,19.039409,1.0,Batthyány tér,1.0,1.0,,,,,,POINT (19.03941 47.50715),0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,47.469158,19.243233,,Rákoshegy,1.0,1.0,1.0,,,,,POINT (19.24323 47.46916),0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [11]:
# Convert all columns to float64
#overpass_df.dtypes
cols = overpass_gdf.columns.drop(['geometry', 'tags_name'])
overpass_gdf[cols] = overpass_gdf[cols].astype('float64')

In [12]:
overpass_gdf = overpass_gdf.drop(['shelter', 'waste_basket', 'bus_station', 'tags_public_transport'], axis = 1)
#overpass_gdf.head(2)
#overpass_gdf.dtypes

In [36]:
# Empty geometry objektumok eldobása
#overpass_gdf.drop(overpass_gdf.tail(3).index,inplace=True)
overpass_gdf = overpass_gdf[~overpass_gdf.geometry.is_empty]
#overpass_gdf.tail()

In [40]:
len(overpass_gdf)

11190