# Auditing Cairo's OSM Data

In [1]:
from audit import XMLAuditor, myprint, clean_arabic_street

In [2]:
audit = XMLAuditor('data/cairo_egypt.osm')

In [3]:
# Counting top-level elements
myprint(audit.count('./').items())

node 327582
total 372880
relation 133
bounds 1
way 45164



In [4]:
# Counting second-level elements
myprint(audit.count('./*/').items())

member 684
nd 402980
total 529054
tag 125390



### Way Elements

In [18]:
# Top 10 way tags
myprint(sorted(
            audit.count('./way/tag',mapto=lambda t: t.get('k'))\
                 .items(),
            key=lambda (x,y): (-y,x))[:])

total 102396
highway 32235
name 10091
oneway 9634
name:en 8462
name:ar 7598
building 7126
alt_name 5594
landuse 1716
source 1439
note 1268
addr:street 1031
bridge 986
amenity 977
layer 914
leisure 913
waterway 801
surface 739
addr:housenumber 645
addr:city 625
created_by 583
junction 526
addr:country 492
ref 434
maxspeed 405
lanes 402
service 394
barrier 369
access 346
tunnel 339
historic 305
foot 302
power 252
addr:postcode 246
railway 227
voltage 223
religion 215
man_made 201
cables 181
aeroway 171
sport 166
natural 161
parking 112
operator 105
boat 93
area 92
building:levels 85
shop 85
denomination 81
tourism 81
name:fr 73
addr:housename 70
alt_name:en 67
lit 66
bicycle 65
int_name 65
wheelchair 62
addr:interpolation 61
military 54
wikipedia 52
embankment 50
name:de 50
name:es 50
horse 46
int_ref 45
place 44
website 44
motor_vehicle 41
capacity 34
fee 33
location 32
substation 30
office 28
electrified 25
name:ru 25
product 25
gauge 23
phone 21
emergency 20
atm 16
toll 16
name:pl 14


### Node Elements

In [6]:
# Top 5 node tags
myprint(sorted(
            audit.count('./node/tag',mapto=lambda t: t.get('k'))\
                 .items(),
            key=lambda (x,y): (-y,x))[:6])

total 22051
power 4847
name 2751
name:en 2022
name:ar 1725
amenity 1381



<b>There are quite a few named nodes. Fewer german than arabic names, fewer arabic names than english ones. However, address tags are not even in the top 15. </b>

In [7]:
# Top 5 values of name
myprint(sorted(
            audit.count('./node/tag[@k="name:en"]',mapto=lambda t: t.get('v'))\
            .items(),
            key=lambda (x,y): (-y,x))[:6])

total 2022
Mobil 16
Misr 12
KFC 8
Metro 7
Shell 7



In [8]:
# Number of nodes with arabic name but no english name
len(audit.count('./node/tag/..',
            mapto=lambda t: t.find('./tag[@k="name:ar"]').get('v'),
            cond=lambda t: bool(t.find('./tag[@k="name:ar"]') is not None \
                                and  t.find('./tag[@k="name:en"]') is None))) - 1

19

In [9]:
# Number of nodes with a german name but no english name
len(audit.count('./node/tag/..',
            mapto=lambda t: t.find('./tag[@k="name:de"]').get('v'),
            cond=lambda t: bool(t.find('./tag[@k="name:de"]') is not None \
                                and  t.find('./tag[@k="name:en"]') is None))) - 1 

1

<b>English named nodes are pretty much a superset of german and arabic named ones.</b>

In [10]:
# Top 10 values of node@k="power"
myprint(sorted(
            audit.count('./node/tag[@k="power"]',mapto=lambda t: t.get('v'))
                 .items(),
            key=lambda (x,y): (-y,x))[:10])

total 4847
tower 4741
pole 106



In [11]:
# Top 10 address node tags
myprint(sorted(
            audit.count('./node/tag',
                             mapto=lambda t: t.get('k'),
                             cond=lambda t: t.get('k').startswith('addr')).items(),
            key=lambda (x,y): (-y,x))[:11])

total 628
addr:housenumber 228
addr:street 169
addr:city 123
addr:housename 55
addr:postcode 38
addr:country 6
addr:place 2
addr:suburb 2
addr:full 1
addr:interpolation 1



<b>So where is the address information?? It seems the data is not used as expected. People use node names and way names rather than addresses.</b>

In [12]:
# Values of country
myprint(sorted(
            audit.count('./node/tag[@k="addr:country"]',
                             mapto=lambda t: t.get('v')).items(),
            key=lambda (x,y): (-y,x))[:10])

EG 6
total 6



In [13]:
# Values of city
myprint(sorted(
            audit.count('./node/tag[@k="addr:city"]',
                             mapto=lambda t: t.get('v')).items(),
            key=lambda (x,y): (-y,x))[:10])

total 123
مدينة 6 أكتوبر 52
Cairo 32
giza 5
الجيزة 4
Giza 3
cairo 3
Gizeh 2
New Cairo 2
القاهرة 2



In [14]:
# Values of street
myprint(sorted(
            audit.count('./node/tag[@k="addr:street"]',
                             mapto=lambda t: clean_arabic_street(t.get('v').strip())).items(),
            key=lambda (x,y): (-y,x)))

total 169
حصرى 14
مركزى 9
مركزى طريق النصر 8
هرم 8
Katameya Heights 5
No 4 5
Road 9 5
Ibrahim Baher Zaghloul 4
Gaewad Hosni 3
دولسى 3
El Hadaek Street 2
Ismail Mohamed Street 2
No 3 2
Salah Salem 2
تحرير 2
ماجدة 2
مبارك 2
واحات 2
1 Sheraton buildings, Area 1, Heliopolis 1
2 Soliman Abaza St.Mohandeseen, Giza 1
205 1
233 1
24 شارع صالح خلف مسجد نصر الدين 1
256 1
26 يوليو 1
26th of July Street 1
26th of July Street (entrance on Al-Aziz Osman street) 1
277 1
51 Khedr El Touny St. 1
59-60 Marioutiya Canal, Shara Kerdessa,  Cairo, Egypt 1
9 1
Abou El Feda Street 1
Ahmed Fakhry Street 1
Al Aaded Street 1
Al Mehwar Al Markazi 1
Al Nahda Street 1
Al Orouba Street 1
Al-Haram 1
Al-Remaya Square 1
Al-Sayed Al-Bakry 1
Brazil Street 1
Dreamland, El Wahat Road 1
El Muez St. 1
Heliopolis 1
Ibn El Nabih 1
Ismail Mohamed Street, Zamalek 1
Joseph Tito Street 1
Kamal Al-Din Salah 1
Kasr El Sham 1
Katamey Heights 1
Lofti Hassuna 1
Makram Ebeed 1
Mohamed Atawia Street 1
Mohamed Ibn Thakib Street 1
Mohamed 

In [15]:
# Values of housename
myprint(sorted(
            audit.count('./node/tag[@k="addr:housename"]',
                             mapto=lambda t: t.get('v')).items(),
            key=lambda (x,y): (-y,x)))

total 55
5od 1
AUC Hostel 1
Cock Door 1
DJ Foola 1
Domino's Pizza دومينوز يتزا 1
El Haram 1
HH Import & Export and Trade Agency 1
MUST Opera House 1
Metro مترو ماركت 1
National Bank 1
Native cafe 1
New Horizon 1
Villa 60 1
al qahira for housing and reconstruction 1
el qahira for housing and reconstruction dwellings 1
أبو على العجلاتى 1
الفادى ماركت 1
القدس للأتصالات 1
المالكى 1
المصطفى 1
بازار الجامعة 1
برعى 2000 للأدوات المكتبية والهدايا 1
بقالة الحرمين 1
بنك التعمير والأسكان 1
بيتزاهت 1
جمعية تنمية وتطوير الصادرات البستانية 1
جمعية رسالة للأعمال الخيرية 1
حدايد وبويات محمود 1
حسنى الكبابجى 1
خبراء للتنمية والأستثمار العقارى 1
د.عواطف شاهير 1
د.نيفين 1
دارى للبناء والكيماويات 1
دجاج كنتاكى 1
دريم لأصلاح الدش 1
سارة (أخوان البطه) سابقاً 1
سينما جولف سيتى 1
صحارى 1
صيدلية د.محمد عبد الرازق 1
صيدلية د.منال 1
صيدلية د/اسلام 1
صيدلية دكتور أحمد زكى 1
طارق للمشويات 1
عصير قصب البركة 1
قهوة الصعيدى 1
كارفور العبور 1
كشك أبو جاد 1
كشك علوان 1
كلية الطب القصر العيني 1
ماركت الشيمى 1
ماكدونلدز 