In [6]:
import overpy
import pandas as pd
from sklearn.neighbors import BallTree
import numpy as np

In [11]:
# Connect to OSM API
api = overpy.Overpass()

# Pick the tag to looking for. Unit of timeout is seconds, adjust it higher if timing out.
query = """
[out:json][timeout:600];
(
  node["landuse"="landfill"];
  way["landuse"="landfill"];
  relation["landuse"="landfill"];
);
out body;
>;
out skel qt;
"""

result = api.query(query)

# Prepare data for DataFrame and pull what information might be attached
data = []
for way in result.ways:
    for node in way.nodes:
        data.append([node.lat, 
                     node.lon,
                     way.tags.get("name", "n/a"), 
                     way.tags.get("operator", "n/a"),
                     way.tags.get("waste", "n/a"),
                     way.tags.get("capacity", "n/a"),
                     way.tags.get("description", "n/a"),
                     way.tags.get("ref", "n/a")])

# Create DataFrame
landfills = pd.DataFrame(data, columns=["Lat", "Lon", "Name", "Operator", "Waste", "Capacity", "Description", "Ref"])

# Print DataFrame
print(landfills)

# Save it
#landfills.to_csv('C:/Users/hughr/OneDrive/Documents/RMI/OSM/tag_landfill.csv', index=False)

               Lat          Lon            Name Operator Waste Capacity  \
0       51.5399790   -0.7504159  Hindhay Quarry     Lanz   n/a      n/a   
1       51.5399729   -0.7500734  Hindhay Quarry     Lanz   n/a      n/a   
2       51.5398795   -0.7492687  Hindhay Quarry     Lanz   n/a      n/a   
3       51.5398328   -0.7489898  Hindhay Quarry     Lanz   n/a      n/a   
4       51.5390644   -0.7472855  Hindhay Quarry     Lanz   n/a      n/a   
...            ...          ...             ...      ...   ...      ...   
969318  45.2141097  147.8649379             n/a      n/a   n/a      n/a   
969319  45.2143380  147.8648091             n/a      n/a   n/a      n/a   
969320  45.2145572  147.8646718             n/a      n/a   n/a      n/a   
969321  45.2146366  147.8642491             n/a      n/a   n/a      n/a   
969322  45.2147341  147.8641107             n/a      n/a   n/a      n/a   

       Description  Ref  
0              n/a  n/a  
1              n/a  n/a  
2              n/a  n

In [4]:
api = overpy.Overpass()

query = """
[out:json];
(
  node["amenity"="waste_disposal"];
  way["amenity"="waste_disposal"];
  relation["amenity"="waste_disposal"];
);
out body;
>;
out skel qt;
"""

result = api.query(query)

# Prepare data for DataFrame
data = []
for way in result.ways:
    for node in way.nodes:
        data.append([node.lat, 
                     node.lon,
                     way.tags.get("name", "n/a"), 
                     way.tags.get("operator", "n/a"),
                     way.tags.get("waste", "n/a"),
                     way.tags.get("capacity", "n/a"),
                     way.tags.get("description", "n/a"),
                     way.tags.get("ref", "n/a")])

# Create DataFrame
disposal = pd.DataFrame(data, columns=["Lat", "Lon", "Name", "Operator", "Waste", "Capacity", "Description", "Ref"])

# Print DataFrame
print(disposal)

# Save it
#disposal.to_csv('C:/Users/hughr/OneDrive/Documents/RMI/OSM/tag_waste_disposal.csv', index=False)

               Lat         Lon Name Operator Waste Capacity Description  Ref
0       53.0566721   8.9085563   44      n/a   n/a      n/a         n/a  n/a
1       53.0574235   8.9085186   44      n/a   n/a      n/a         n/a  n/a
2       53.0574191   8.9082740   44      n/a   n/a      n/a         n/a  n/a
3       53.0575710   8.9082664   44      n/a   n/a      n/a         n/a  n/a
4       53.0575799   8.9087554   44      n/a   n/a      n/a         n/a  n/a
...            ...         ...  ...      ...   ...      ...         ...  ...
103320  54.9792971  83.0537673  n/a      n/a   n/a      n/a         n/a  n/a
103321  54.9792971  83.0537673  n/a      n/a   n/a      n/a         n/a  n/a
103322  54.9793052  83.0538075  n/a      n/a   n/a      n/a         n/a  n/a
103323  54.9791985  83.0538723  n/a      n/a   n/a      n/a         n/a  n/a
103324  54.9791850  83.0538048  n/a      n/a   n/a      n/a         n/a  n/a

[103325 rows x 8 columns]


In [7]:
api = overpy.Overpass()

query = """
[out:json][timeout:1200];
(
  node["amenity"="recycling"];
  way["amenity"="recycling"];
  relation["amenity"="recycling"];
);
out body;
>;
out skel qt;
"""

result = api.query(query)

# Prepare data for DataFrame
data = []
for way in result.ways:
    for node in way.nodes:
        data.append([node.lat, 
                     node.lon,
                     way.tags.get("name", "n/a"), 
                     way.tags.get("operator", "n/a"),
                     way.tags.get("waste", "n/a"),
                     way.tags.get("capacity", "n/a"),
                     way.tags.get("description", "n/a"),
                     way.tags.get("ref", "n/a")])

# Create DataFrame
recycling = pd.DataFrame(data, columns=["Lat", "Lon", "Name", "Operator", "Waste", "Capacity", "Description", "Ref"])

# Print DataFrame
print(recycling)

# Save it
#recycling.to_csv('C:/Users/hughr/OneDrive/Documents/RMI/OSM/tag_waste_disposal.csv', index=False)

               Lat          Lon                                         Name  \
0       51.9891980   -0.2004669  Letchworth Household Waste Recycling Centre   
1       51.9897390   -0.2009866  Letchworth Household Waste Recycling Centre   
2       51.9899329   -0.2006622  Letchworth Household Waste Recycling Centre   
3       51.9895095   -0.1998233  Letchworth Household Waste Recycling Centre   
4       51.9891980   -0.2004669  Letchworth Household Waste Recycling Centre   
...            ...          ...                                          ...   
232788  34.5108898  135.7581733                                          n/a   
232789  34.5105097  135.7580338                                          n/a   
232790  34.5105826  135.7574880                                          n/a   
232791  34.5108191  135.7572801                                          n/a   
232792  34.5108444  135.7573175                                          n/a   

       Operator Waste Capacity Descript

In [None]:
for c in waste_disposal.columns:
    print(waste_disposal[c].unique().shape)

In [34]:
# Convert Lat/Lon to radians for use in haversine formula
df_rad = np.deg2rad(waste_disposal[['Lat', 'Lon']])

# Construct BallTree
tree = BallTree(df_rad, metric='haversine')

# Initialize array to keep track of whether row should be kept
keep = np.ones(waste_disposal.shape[0], dtype=bool)

# Convert min_distance (km) to radians
# Earth's radius is approximately 6371 km
min_distance = 5  # distance in km
min_distance_rad = min_distance / 6371

# Iterate over each point
for i, row in enumerate(df_rad.values):
    if keep[i]:  # if this point hasn't been marked for removal
        # Find all points within min_distance of this point
        inds = tree.query_radius([row], r=min_distance_rad)[0]
        
        # Mark all *other* points as not to keep
        inds = inds[inds != i]  # remove self from list
        keep[inds] = False

# Filter DataFrame
df_filtered = waste_disposal[keep]

In [35]:
df_filtered.shape

(3859, 8)

In [13]:
# Code to drop points that are near each other

#landfills = pd.read_csv('C:/Users/hughr/OneDrive/Documents/RMI/OSM/tag_landfill.csv')

# Convert Lat/Lon to radians for use in haversine formula
df_rad = np.deg2rad(landfills[['Lat', 'Lon']].astype(np.float64))

# Construct BallTree
tree = BallTree(df_rad, metric='haversine')

# Initialize array to keep track of whether row should be kept
keep = np.ones(landfills.shape[0], dtype=bool)

# Convert min_distance (km) to radians
# Earth's radius is approximately 6371 km
min_distance = 10  # distance in km
min_distance_rad = min_distance / 6371

# Iterate over each point
for i, row in enumerate(df_rad.values):
    if keep[i]:  # if this point hasn't been marked for removal
        # Find all points within min_distance of this point
        inds = tree.query_radius([row], r=min_distance_rad)[0]
        
        # Mark all *other* points as not to keep
        inds = inds[inds != i]  # remove self from list
        keep[inds] = False

# Filter DataFrame
landfills_filtered = landfills[keep]
print(landfills.shape)
print(landfills_filtered.shape)

(969323, 8)
(18430, 8)


In [14]:
landfills[['Lat', 'Lon']].astype(np.float64)

Unnamed: 0,Lat,Lon
0,51.539979,-0.750416
1,51.539973,-0.750073
2,51.539879,-0.749269
3,51.539833,-0.748990
4,51.539064,-0.747286
...,...,...
969318,45.214110,147.864938
969319,45.214338,147.864809
969320,45.214557,147.864672
969321,45.214637,147.864249


In [15]:
landfills[['Lat', 'Lon']]

Unnamed: 0,Lat,Lon
0,51.5399790,-0.7504159
1,51.5399729,-0.7500734
2,51.5398795,-0.7492687
3,51.5398328,-0.7489898
4,51.5390644,-0.7472855
...,...,...
969318,45.2141097,147.8649379
969319,45.2143380,147.8648091
969320,45.2145572,147.8646718
969321,45.2146366,147.8642491


In [None]:
# Big one usually times out, don't use this one

# import pandas as pd
# import overpy

# api = overpy.Overpass()

# query = """
# [out:json];
# (
#   node["amenity"="waste_disposal"];
#   way["amenity"="waste_disposal"];
#   relation["amenity"="waste_disposal"];

#   node["amenity"="recycling"];
#   way["amenity"="recycling"];
#   relation["amenity"="recycling"];
  
#   node["amenity"="recycling_type_centre"];
#   way["amenity"="recycling_type_centre"];
#   relation["amenity"="recycling_type_centre"];

#   node["landuse"="landfill"];
#   way["landuse"="landfill"];
#   relation["landuse"="landfill"];
  
#   node["landuse"="waste_site"];
#   way["landuse"="waste_site"];
#   relation["landuse"="waste_site"];
  
#   node["amenity"="waste_dumps"];
#   way["amenity"="waste_dumps"];
#   relation["amenity"="waste_dumps"];
  
#   node["amenity"="waste_processing"];
#   way["amenity"="waste_processing"];
#   relation["amenity"="waste_processing"];
  
#   node["man_made"="works"]["industry"="waste"];
#   way["man_made"="works"]["industry"="waste"];
#   relation["man_made"="works"]["industry"="waste"];
  
# );
# out body;
# >;
# out skel qt;
# """

# result = api.query(query)

# # Prepare data for DataFrame
# data = []
# for way in result.ways:
#     for node in way.nodes:
#         data.append([way.tags.get("name", "n/a"), way.tags.get("highway", "n/a"), node.lat, node.lon])

# # Create DataFrame
# df = pd.DataFrame(data, columns=["Name", "Highway", "Lat", "Lon"])

# # Print DataFrame
# print(df)