# Supermarkets using Overpass API

In [21]:
import requests
import pandas as pd

In [22]:
# Define the Overpass API URL
overpass_url = "http://overpass-api.de/api/interpreter"

In [23]:
# Overpass API query to find all supermarkets in Victoria
overpass_query = """
[out:json];
area["name"="Victoria"]->.searchArea;
node["shop"="supermarket"](area.searchArea);
out body;
"""

In [24]:
# Make the API request
response = requests.get(overpass_url, params={'data': overpass_query})
data = response.json()

In [25]:
response

<Response [200]>

In [26]:
data

{'version': 0.6,
 'generator': 'Overpass API 0.7.62.1 084b4234',
 'osm3s': {'timestamp_osm_base': '2024-09-15T14:18:00Z',
  'timestamp_areas_base': '2024-09-15T08:49:43Z',
  'copyright': 'The data included in this document is from www.openstreetmap.org. The data is made available under ODbL.'},
 'elements': [{'type': 'node',
   'id': 32193447,
   'lat': -37.2773622,
   'lon': 144.7338613,
   'tags': {'name': 'IGA', 'shop': 'supermarket'}},
  {'type': 'node',
   'id': 218028470,
   'lat': -37.7914336,
   'lon': 145.1718422,
   'tags': {'addr:housenumber': '55',
    'addr:street': 'Tunstall Square',
    'brand': 'Coles',
    'brand:wikidata': 'Q1108172',
    'brand:wikipedia': 'en:Coles Supermarkets',
    'name': 'Coles',
    'opening_hours': '07:00-24:00',
    'operator': 'Coles Group',
    'operator:wikidata': 'Q1339055',
    'postal_code': '3109',
    'shop': 'supermarket',
    'website': 'https://www.coles.com.au/'}},
  {'type': 'node',
   'id': 222420863,
   'lat': -37.7889872,
   '

In [27]:
names = []
latitudes = []
longitudes = []
suburbs = []
postcodes = []
addresses = []

# Store supermarket names and locations as a list
for element in data['elements']:
    # Store names of supermarket
    name = element['tags'].get('name', 'Unnamed Supermarket')
    names.append(name)
    
    # Store coordinates 
    latitudes.append(element['lat'])
    longitudes.append(element['lon'])
    
    # Extract suburb (if available)
    suburb = element['tags'].get('addr:suburb', element['tags'].get('addr:city', 'No Suburb'))
    suburbs.append(suburb)
    
    # Store postcode 
    postcode = element['tags'].get('addr:postcode', 'No Postcode')
    postcodes.append(postcode)
    
    # Store address by combining house number and street
    house_number = element['tags'].get('addr:housenumber', '')
    street = element['tags'].get('addr:street', '')
    address = f"{house_number} {street}".strip()  
    addresses.append(address)

In [28]:
# Convert lists of supermarkets into a dataframe
supermarkets_df = pd.DataFrame({
    'Supermarket': names,
    'Latitude': latitudes,
    'Longitude': longitudes,
    'Suburb': suburbs,
    'Postcode': postcodes,
    'Address': addresses
})

supermarkets_df[supermarkets_df['Suburb']!='No Suburb']

Unnamed: 0,Supermarket,Latitude,Longitude,Suburb,Postcode,Address
9,Coles,-37.794199,145.279174,Croydon,3136,5-15 Kent Avenue
11,IGA,-36.750813,145.571003,Euroa,3666,75 Binney Street
16,Pepper's Foods,48.460952,-123.296921,Victoria,V8N 4G1,3829 Cadboro Bay Road
24,Woolworths,-37.822934,145.302038,Bayswater North,3153,17-39 Canterbury Road
25,Woolworths,-37.803235,145.310815,Kilysth,3137,534-542 Mount Dandenong Road
...,...,...,...,...,...,...
973,Coles,-37.878491,145.092334,Ashwood,No Postcode,695 Warrigal Road
976,Hong Asian Grocery Shop,-35.919706,145.649380,Cobram,3644,39 High Street
988,Foodworks,-37.936011,145.061230,East Bentleigh,3165,293 Chesterville Road
999,IGA,-37.885727,145.058607,Carnegie,3163,18-30 Woorayl Street


In [29]:
# Check entries with suburb names listed.
supermarkets_df[supermarkets_df['Suburb']!='No Suburb']

Unnamed: 0,Supermarket,Latitude,Longitude,Suburb,Postcode,Address
9,Coles,-37.794199,145.279174,Croydon,3136,5-15 Kent Avenue
11,IGA,-36.750813,145.571003,Euroa,3666,75 Binney Street
16,Pepper's Foods,48.460952,-123.296921,Victoria,V8N 4G1,3829 Cadboro Bay Road
24,Woolworths,-37.822934,145.302038,Bayswater North,3153,17-39 Canterbury Road
25,Woolworths,-37.803235,145.310815,Kilysth,3137,534-542 Mount Dandenong Road
...,...,...,...,...,...,...
973,Coles,-37.878491,145.092334,Ashwood,No Postcode,695 Warrigal Road
976,Hong Asian Grocery Shop,-35.919706,145.649380,Cobram,3644,39 High Street
988,Foodworks,-37.936011,145.061230,East Bentleigh,3165,293 Chesterville Road
999,IGA,-37.885727,145.058607,Carnegie,3163,18-30 Woorayl Street


* We see that Supermarket Pepper's Foods was labelled as Victoria Suburb with postcode V8N 4G1, how should we approach this?
* Only 174 out of 1012 entries had a suburb name labelled. Should we remove the suburb column?

In [30]:
# total supermarkets in Victoria
len(supermarkets_df)

1012

In [31]:
# save the DataFrame to a CSV file
supermarkets_df.to_csv('../data/curated/preprocessed_supermarkets.csv', index=False)