### Imports

In [0]:
import pandas as pd
import numpy as np
import folium
from bs4 import BeautifulSoup
import requests
# !pip install pgeocode
# import pgeocode
from geopy.geocoders import Nominatim

### Getting the data

In [2]:
html_doc = requests.get('https://finkode.com/mh/mumbai.html')
soup = BeautifulSoup(html_doc.content, 'html.parser')
pcode_table = soup.find('table',attrs={'class':'plist'})
pcode_table = pcode_table.find_all('tr')
del(pcode_table[0])
head_columns = ['Area','Pincode']
df = pd.DataFrame(columns=head_columns)
for tr in pcode_table:
  temp_data = tr.find_all('td')
  df = df.append({'Pincode':temp_data[2].text,'Area':temp_data[0].text}, ignore_index=True)
df.head()

Unnamed: 0,Area,Pincode
0,A I Staff Colony S.O,400029
1,Aareymilk Colony S.O,400065
2,Agripada S.O,400011
3,Ambewadi S.O (Mumbai),400004
4,Andheri East S.O,400069


### Adding geo coordinates to the dataframe

In [9]:
geolocator = Nominatim(user_agent="in_explorer")
for index,pincode in enumerate(df['Pincode']):
  code = geolocator.geocode('Mumbai - '+pincode)
  if code == None:
    lat = lon = 'NA'
  else:
    lat = code.latitude
    lon = code.longitude
  df.loc[index,'Latitude'] = lat
  df.loc[index,'Longitude'] = lon
  print(lat,lon)
df.head()

19.15162244816581 72.85498103682772
19.180136292367155 72.90881206880297
18.9387711 72.8353355
18.9387711 72.8353355
19.130181654802666 72.8530796168885
19.144419550000002 72.82661210819862
19.12573235 72.84267477586994
18.9387711 72.8353355
19.037527767647283 72.92814570708681
18.9387711 72.8353355
19.144419550000002 72.82661210819862
18.9387711 72.8353355
19.06377 72.849661
18.9387711 72.8353355
18.9387711 72.8353355
19.06377 72.849661
19.1515749 72.84069279056939
NA NA
19.1192271 72.92180253333333
18.9387711 72.8353355
19.091985700000002 72.91677167312055
18.9387711 72.8353355
19.172896496576104 72.93493041411237
19.1303562 72.9380041
19.1502055 72.9386169
19.1502055 72.9386169
18.9658877 72.81479742587862
18.9387711 72.8353355
18.9387711 72.8353355
19.229007442724786 72.86207247076649
19.23014206960739 72.83760735114791
19.2061608 72.7852955
18.9387711 72.8353355
18.9387711 72.8353355
18.9387711 72.8353355
19.1217752 72.8637528
18.9387711 72.8353355
19.20772265 72.8373895553008
18.

Unnamed: 0,Area,Pincode,Latitude,Longitude
0,A I Staff Colony S.O,400029,19.1516,72.855
1,Aareymilk Colony S.O,400065,19.1801,72.9088
2,Agripada S.O,400011,18.9388,72.8353
3,Ambewadi S.O (Mumbai),400004,18.9388,72.8353
4,Andheri East S.O,400069,19.1302,72.8531


### Visualizing data on map

In [0]:
df = df[df['Latitude']!='NA']

Now we successfully removed areas for which api was unable to fetch coordinates.

In [26]:
df.drop_duplicates(['Latitude','Longitude'],inplace=True)
df.reset_index(drop=True,inplace=True)
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Area,Pincode,Latitude,Longitude
0,A I Staff Colony S.O,400029,19.1516,72.855
1,Aareymilk Colony S.O,400065,19.1801,72.9088
2,Agripada S.O,400011,18.9388,72.8353
3,Andheri East S.O,400069,19.1302,72.8531
4,Andheri H.O,400053,19.1444,72.8266
5,Andheri Railway Station S.O,400058,19.1257,72.8427
6,Anushakti Nagar S.O,400094,19.0375,72.9281
7,B.N. Bhavan S.O,400051,19.0638,72.8497
8,Bangur Nagar S.O,400104,19.1516,72.8407
9,Barve Nagar S.O,400084,19.1192,72.9218


After cleaning data we find that only 54 pincodes are correctly located.

In [27]:
address = 'Kalina, Mumbai'
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Mumbai are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Mumbai are 19.079273, 72.8612672.


In [28]:
mum_map = folium.Map(location=[latitude,longitude],zoom_start=11)

for lat,lng,label in zip(df['Latitude'],df['Longitude'],df['Area']):
  label = folium.Popup(label, parse_html=True)
  folium.CircleMarker(
      [lat,lng],
      radius=5,
      popup=label,
      color='blue',
      fill=True,
      fill_color='#3186cc',
      fill_opacity=0.7,
      parse_html=False
  ).add_to(mum_map)

mum_map