# Collecting POI data of a city

In [1]:
import overpy
import pandas as pd

overpy is to overpass API 

### Define a dictionary to store fetched POI data

In [5]:
data = {"id":[] , "category":[], "sub_category":[], "lat":[], "lon":[] }

### Define some functions to automate the fetching POI data from OverpassAPI

In [6]:
def add_data(category, sub_category, ways):
    for way in ways:
        data["id"].append(way.id)
        data["category"].append(category)
        data["sub_category"].append(sub_category)
        data["lat"].append(float(way.center_lat))
        data["lon"].append(float(way.center_lon))

###   Helper function to create query string

In [7]:
def get_query_string(key, value=None, city="Delhi"):
    if value is None:
        query_string = f"""
        [out:json];
        area[name="{city}"];
        way["{key}"](area);
        out center;    
    """
    else:
        query_string = f"""
            [out:json];
            area[name="{city}"];
            way["{key}"="{value}"](area);
            out center;    
        """
        
    return query_string     

### Custom Function to call OverpassAPI
    

In [8]:
def api_call(key, category="", value_list= None, value=None, city="Delhi"):
    api = overpy.Overpass()
    
    if value_list is not None:
        for val in value_list:
            query_string = get_query_string(key=key, value=val, city=city)
            result = api.query(query_string)
            add_data(category, val, result.ways)
    elif value is not None:
        get_query_string(key, value, city)
        result = api.query(query_string)
        add_data(category, key, result.ways)
    else:
        query_string = get_query_string(key=key, city=city)
        result = api.query(query_string)
        add_data(category, key, result.ways)

### Fetch all POIs related to their category

In [9]:
# Sustenance
sustenance = ["bar", "bbq", "cafe", "biergarten", "fast_food", "food_court", "pub", "restaurant", "ice_cream"]
api_call(key="amenity", category="sustenance", value_list=sustenance)
len(data["id"])

31

In [10]:
# Education
education = ["college", "school", "university", "library", "language_school", "kindergarten"]
api_call(key="amenity", category="education", value_list=education)
len(data["id"])

736

In [11]:
# Entertainment
entertainment = ["arts_centre", "casino", "cinema", "communtity_centre", "gambling", "nightclub", "planetarium", "theater"]
api_call(key="amenity", category="entertainment", value_list=entertainment)
len(data["id"])

758

In [12]:
# Healthcare
healthcare = ["hospital", "clinic", "nursing_home", "pharmacy", "veterinary", "doctors", "dentist"]
api_call(key="amenity", category="healthcare", value_list=healthcare)
len(data["id"])

868

In [13]:
# Accomodation
accomodation = ["apartments", "bunglow", "dormitory", "house", "residential"]
api_call(key="building", category="accomodation", value_list=accomodation)
len(data["id"])

3161

In [16]:
# Commercial
commercial = ["commercial", "industrial","office", "retail", "supermarket"]
api_call(key="building", category="commercial", value_list=commercial)
len(data["id"])

4011

In [20]:
# Religious
religious = ["church", "mosque", "temple"]
api_call(key="building", category="religious", value_list=religious)
len(data["id"])

4055

In [21]:
# Agriculture
agriculture = ["barn", "conservatory", "cowshed", "farm_auxiliary", "greenhouse", "stable", "sty"]
api_call(key="building", category="agriculture", value_list=agriculture)
len(data["id"])

4064

In [22]:
# Historic
api_call(key="historic", category="historic")
len(data["id"])

4199

In [23]:
# Tourism
api_call(key="tourism", category="tourism")
len(data["id"])

4353

### Creating dataframe to store all POI data

In [24]:
df = pd.DataFrame(data)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4353 entries, 0 to 4352
Data columns (total 5 columns):
id              4353 non-null int64
category        4353 non-null object
sub_category    4353 non-null object
lat             4353 non-null float64
lon             4353 non-null float64
dtypes: float64(2), int64(1), object(2)
memory usage: 170.1+ KB


In [25]:
df.head()

Unnamed: 0,id,category,sub_category,lat,lon
0,352253821,sustenance,bar,28.665502,77.302875
1,154110068,sustenance,cafe,28.59487,77.018324
2,252334020,sustenance,cafe,28.559042,77.283511
3,311711702,sustenance,cafe,37.430555,-120.777042
4,340381516,sustenance,cafe,28.549631,77.167901


In [26]:
df.shape

(4353, 5)

### Removing duplicates rows based on their IDs

In [27]:
df.drop_duplicates(subset ="id", 
                     keep = False, inplace = True) 
df.shape

(3562, 5)

### Save the collected POI data as a CSV file

In [29]:
df.to_csv("Delhi_geo_data.csv", index=False)