### 1. Import libraries

In [1]:
!pip install geocoder
!pip install folium

import numpy as np # library to handle data in a vectorized amanner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geocoder # to get coordinates

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

print("Libraries imported.")

Libraries imported.


### 2. Scrap data from Wikipedia page into a DataFrame

In [2]:
data = requests.get("https://en.wikipedia.org/wiki/Category:Neighbourhoods_in_Tehran").text

In [3]:
soup = BeautifulSoup(data, 'html.parser')

In [4]:
neighborhoodList = []

In [5]:
for row in soup.find_all("div", class_="mw-category")[0].findAll("li"):
    neighborhoodList.append(row.text)

In [6]:
teh_df = pd.DataFrame({"Neighborhood": neighborhoodList})

teh_df.head()

Unnamed: 0,Neighborhood
0,Abbas Abad (Tehran)
1,Afsariyeh
2,Aghdasieh
3,Ajudanieh
4,Amir Abad


In [7]:
teh_df.shape

(76, 1)

### 3. Get the geographical coordinates

In [8]:
# define a function to get coordinates
def get_latlng(neighborhood):
    # initialize your variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Tehran, IRAN'.format(neighborhood))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [10]:
# call the function to get the coordinates, store in a new list using list comprehension
coords = [ get_latlng(neighborhood) for neighborhood in teh_df["Neighborhood"].tolist() ]

In [11]:
coords

[[35.733400000000074, 51.434480000000065],
 [35.654130000000066, 51.49326000000008],
 [35.694390000000055, 51.42151000000007],
 [35.79860000000008, 51.47870000000006],
 [35.262500000000045, 51.70694000000003],
 [35.694390000000055, 51.42151000000007],
 [35.74720000000008, 51.32600000000008],
 [35.694390000000055, 51.42151000000007],
 [29.962300000000027, 57.25350000000003],
 [35.81830000000008, 51.491400000000056],
 [35.80780000000004, 51.38250000000005],
 [35.81340000000006, 51.43020000000007],
 [35.694390000000055, 51.42151000000007],
 [35.77170000000007, 51.456900000000076],
 [35.490500000000054, 51.06430000000006],
 [29.963500000000067, 56.42480000000006],
 [35.694390000000055, 51.42151000000007],
 [35.694390000000055, 51.42151000000007],
 [35.79000000000008, 51.46360000000004],
 [35.79150000000004, 51.428300000000036],
 [35.793900000000065, 51.393500000000074],
 [35.79030000000006, 51.338700000000074],
 [35.796200000000056, 51.46710000000007],
 [34.51580000000007, 49.2022000000000

In [12]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [13]:
# merge the coordinates into the original dataframe
teh_df['Latitude'] = df_coords['Latitude']
teh_df['Longitude'] = df_coords['Longitude']

In [14]:
# check the neighborhoods and the coordinates
print(teh_df.shape)
teh_df

(76, 3)


Unnamed: 0,Neighborhood,Latitude,Longitude
0,Abbas Abad (Tehran),35.7334,51.43448
1,Afsariyeh,35.65413,51.49326
2,Aghdasieh,35.69439,51.42151
3,Ajudanieh,35.7986,51.4787
4,Amir Abad,35.2625,51.70694
5,Apadana Residential Complex,35.69439,51.42151
6,Bagh Feiz,35.7472,51.326
7,"Bahar, Tehran",35.69439,51.42151
8,Baharestan (district),29.9623,57.2535
9,"Darabad, Tehran",35.8183,51.4914


In [15]:
# save the DataFrame as CSV file
teh_df.to_csv("teh_df.csv", index=False)

### 4. Create a map of Kuala Lumpur with neighborhoods superimposed on top

In [16]:
# get the coordinates of Kuala Lumpur
address = 'Tehran, IRAN'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Tehran, IRAN {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Tehran, IRAN 35.7006177, 51.4013785.


In [17]:
# create map of Toronto using latitude and longitude values
map_teh = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, neighborhood in zip(teh_df['Latitude'], teh_df['Longitude'], teh_df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_teh)  
    
map_teh

In [19]:
# save the map as HTML file
map_teh.save('map_teh.html')