In [1]:
import osmium
import csv

In [33]:
import osmium
import shapely.wkb as wkblib
from shapely.geometry import shape

class PharmacyHandler(osmium.SimpleHandler):
    def __init__(self):
        super(PharmacyHandler, self).__init__()
        self.pharmacies = []
        self.wkbfab = osmium.geom.WKBFactory()

    def node(self, n):
        if self.is_pharmacy(n.tags):
            pharmacy_data = self.extract_pharmacy_data(n)
            self.pharmacies.append(pharmacy_data)

    def way(self, w):
        if self.is_pharmacy(w.tags):
            centroid = self.calculate_centroid(w)
            pharmacy_data = self.extract_pharmacy_data(w, centroid)
            self.pharmacies.append(pharmacy_data)

    def relation(self, r):
        if self.is_pharmacy(r.tags):
            centroid = self.calculate_centroid(r)
            pharmacy_data = self.extract_pharmacy_data(r, centroid)
            self.pharmacies.append(pharmacy_data)

    def is_pharmacy(self, tags):
        # List of known pharmacy chain names
        pharmacy_chains = [
            'Farmacias Similares',
            'Farmacias Guadalajara',
            'Farmacias del Ahorro',
            'Farmacias Benavides',
            'Farmacia San Pablo',
            # Add more chains as needed
        ]

        if ('amenity' in tags and tags['amenity'] == 'pharmacy') or \
           ('shop' in tags and tags['shop'] == 'chemist') or \
           ('healthcare' in tags and tags['healthcare'] == 'pharmacy') or \
           ('dispensing' in tags and tags['dispensing'] == 'yes'):
            return True

        # Check if 'farmacia' or 'pharmacy' is in the name
        if 'name' in tags:
            name_lower = tags['name'].lower()
            if 'farmacia' in name_lower or 'pharmacy' in name_lower:
                return True
            # Check if the name matches a known pharmacy chain
            for chain in pharmacy_chains:
                if chain.lower() in name_lower:
                    return True
        return False

    def calculate_centroid(self, element):
        try:
            if isinstance(element, osmium.osm.Way):
                if element.is_closed():
                    wkb = self.wkbfab.create_polygon(element)
                else:
                    wkb = self.wkbfab.create_linestring(element)
            elif isinstance(element, osmium.osm.Relation):
                wkb = self.wkbfab.create_multipolygon(element)
            else:
                return (None, None)

            geom = wkblib.loads(wkb, hex=True)
            centroid = geom.centroid
            return (centroid.y, centroid.x)
        except Exception as e:
            print(f"Error calculating centroid for element {element.id}: {e}")
            return (None, None)

    def extract_pharmacy_data(self, element, centroid=None):
        tags = dict(element.tags)
        pharmacy_data = {
            'id': element.id,
            'type': element.__class__.__name__.lower(),  # 'node', 'way', or 'relation'
            'latitude': centroid[0] if centroid else getattr(element.location, 'lat', None),
            'longitude': centroid[1] if centroid else getattr(element.location, 'lon', None),
            'name': tags.get('name'),
            'street': tags.get('addr:street'),
            'housenumber': tags.get('addr:housenumber'),
            'city': tags.get('addr:city'),
            'postcode': tags.get('addr:postcode'),
            'state': tags.get('addr:state'),
            'country': tags.get('addr:country'),
            'phone': tags.get('phone') or tags.get('contact:phone'),
            'email': tags.get('email') or tags.get('contact:email'),
            'website': tags.get('website') or tags.get('contact:website'),
            'opening_hours': tags.get('opening_hours'),
            'operator': tags.get('operator'),
            'brand': tags.get('brand'),
            'brand_wikidata': tags.get('brand:wikidata'),
            'brand_wikipedia': tags.get('brand:wikipedia'),
            'wheelchair': tags.get('wheelchair'),
            'drive_through': tags.get('drive_through'),
            'dispensing': tags.get('dispensing'),
            'osm_version': element.version,
            'timestamp': element.timestamp.isoformat() if element.timestamp else None,
            'changeset': element.changeset,
            'user': element.user,
            'uid': element.uid,
            # Include all tags if needed
            # 'all_tags': tags
        }
        return pharmacy_data


In [34]:
handler = PharmacyHandler()

In [35]:
handler.apply_file("mexico-latest.osm.pbf")

Error calculating centroid for element 121465445: 'osmium.geom.WKBFactory' object has no attribute 'create_polygon'
Error calculating centroid for element 128573541: 'osmium.geom.WKBFactory' object has no attribute 'create_polygon'
Error calculating centroid for element 150577761: 'osmium.geom.WKBFactory' object has no attribute 'create_polygon'
Error calculating centroid for element 156348839: 'osmium.geom.WKBFactory' object has no attribute 'create_polygon'
Error calculating centroid for element 157966182: 'osmium.geom.WKBFactory' object has no attribute 'create_polygon'
Error calculating centroid for element 158626897: 'osmium.geom.WKBFactory' object has no attribute 'create_polygon'
Error calculating centroid for element 160188408: 'osmium.geom.WKBFactory' object has no attribute 'create_polygon'
Error calculating centroid for element 177530547: 'osmium.geom.WKBFactory' object has no attribute 'create_polygon'
Error calculating centroid for element 180337494: 'osmium.geom.WKBFactor

In [36]:
pharmacies = handler.pharmacies

# Optional: Save to a CSV file
fieldnames = [
    'id', 'type', 'latitude', 'longitude', 'name', 'street', 'housenumber', 'city', 'postcode',
    'state', 'country', 'phone', 'email', 'website', 'opening_hours', 'operator', 'brand',
    'brand_wikidata', 'brand_wikipedia', 'wheelchair', 'drive_through', 'dispensing',
    'osm_version', 'timestamp', 'changeset', 'user', 'uid'
]

with open('pharmacies_mexico.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for pharmacy in pharmacies:
        writer.writerow(pharmacy)

print(f"Extracted {len(pharmacies)} pharmacies.")

Extracted 5939 pharmacies.


In [17]:
len(pharmacies)

5523

In [18]:
import pandas as pd
import numpy as np

In [19]:
df = pd.read_csv('pharmacies_mexico.csv')

In [20]:
df.head()

Unnamed: 0,id,type,latitude,longitude,name,street,housenumber,city,postcode,state,...,brand_wikidata,brand_wikipedia,wheelchair,drive_through,dispensing,osm_version,timestamp,changeset,user,uid
0,280308494,node,20.604881,-105.233711,Farmacia Guadalajara,,,,,,...,Q5435609,,,,,6,2024-05-17T15:40:48+00:00,0,,0
1,293542771,node,25.667318,-100.312021,Benavides,,,,,,...,Q5435613,,,,,8,2023-03-30T07:16:48+00:00,0,,0
2,309251520,node,16.730159,-92.640593,Farmacias del Ahorro,,,,,,...,Q62086647,,,,,3,2022-05-20T21:41:56+00:00,0,,0
3,332364266,node,19.344371,-99.188942,Farmacias del Ahorro,,,,,,...,Q62086647,,,,,3,2022-07-12T01:18:14+00:00,0,,0
4,337877608,node,17.640843,-101.555545,Farmapronto,,,,,,...,,,,,yes,2,2013-01-07T03:37:46+00:00,0,,0


In [22]:
df.columns

Index(['id', 'type', 'latitude', 'longitude', 'name', 'street', 'housenumber',
       'city', 'postcode', 'state', 'country', 'phone', 'email', 'website',
       'opening_hours', 'operator', 'brand', 'brand_wikidata',
       'brand_wikipedia', 'wheelchair', 'drive_through', 'dispensing',
       'osm_version', 'timestamp', 'changeset', 'user', 'uid'],
      dtype='object')

In [23]:
df['brand']

0       Farmacia Guadalajara
1                  Benavides
2       Farmacias del Ahorro
3       Farmacias del Ahorro
4                        NaN
                ...         
5518    Farmacia Guadalajara
5519    Farmacia Guadalajara
5520               Benavides
5521                     NaN
5522    Farmacia Guadalajara
Name: brand, Length: 5523, dtype: object

In [24]:
df['brand'].value_counts()

brand
Farmacia Guadalajara        626
Farmacias Similares         523
Farmacias del Ahorro        429
Benavides                   233
Farmacia Benavides           43
Farmacias Yza                34
Farmatodo                    28
Farmacia Similares           24
Farmacias Unión              12
Farmacias del Dr. Simi       10
Farmahorro                    2
Farmacias YZA                 1
Farmacias Ahumada             1
Farmacia GI                   1
Benu                          1
Farmacias Especializadas      1
Farmacia San Pablo            1
Name: count, dtype: int64

In [28]:
df['name'].value_counts().head(30)

name
Farmacia Guadalajara        754
Farmacias Similares         633
Farmacias del Ahorro        519
Benavides                   263
Farmacias Guadalajara        78
Farmacia Similares           65
Farmacia Benavides           63
Farmacias Yza                52
Similares                    39
Farmacia San Pablo           33
Farmacia del Ahorro          32
Guadalajara                  31
Farmatodo                    30
Farmacias Benavides          27
Farmapronto                  26
Farmacia                     25
San Pablo                    22
Farmacias Especializadas     20
Farmacias GI                 16
Farmacias YZA                15
Farmacia Yza                 13
Farmacias Unión              13
Farmacia GI                  12
Farmacias Sufacen            11
Farmacia Roma                11
Del Ahorro                   10
Farmacon                     10
Farmacias similares          10
Yza                          10
Farmacias del Dr. Simi       10
Name: count, dtype: int64