# Scrape list of villages in Ranchi, get their respenctive latitude and longitude and store them in a csv 'ranchi_villages.csv'

In [2]:
# Using beautiful soup to scrape ranchi blocks
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

## Step 1: Scrape list of villages in Ranchi from web.

In [None]:
def get_soup_object(url):
    source_data = requests.get(url).text
    return BeautifulSoup(source_data,'lxml')

In [None]:
# initialize url
rnc_data_url = 'http://vlist.in/district/364.html'
# use function to get soup object
soup = get_soup_object(rnc_data_url)
print('Soup object created')

In [None]:
village_url_header = 'http://vlist.in'
district_name = 'Ranchi'

In [None]:
# function extracts row from the table from government website. This will return the name in the table and the link associated with the name
def extract_row(table_row):
    table_row = table_row.find_all('td')
    
    index = table_row[0].text
    
    link = village_url_header + table_row[1].find('a')['href']
    
    name = table_row[1].text
    
    return link, name

In [None]:
# extracting the block rows
table_rows = soup.find_all('tr')
table_rows = table_rows[1:]
table_rows = table_rows[1:]
data = []
# for every block row all the villages will also be extracted
for table_row in table_rows:
    
    sub_district_link, block_name = extract_row(table_row)
    print(block_name)
    # getting the sub villages in block
    soup_village = get_soup_object(sub_district_link)
    # get all the table rows for individual villages in block
    sub_table_rows = soup_village.find_all('tr')
    sub_table_rows = sub_table_rows[1:]
    
    # extract individual village name and store it in data along with block name and district name
    for sub_table_row in sub_table_rows:
    
        sub_link, village_name = extract_row(sub_table_row)
        
        data.append([village_name, block_name, district_name])

print(data[0])

## Step 2: Store scraped data in a data frame

In [None]:
# save data in csv for future usage
header = ['Village','Block','District']
df = pd.DataFrame(data= data, columns= header)

In [None]:
df.head()

## Step 3: for every row(village item) in the dataframe get its longitude and latitude and append to the dataframe

In [3]:
# use geocoder library, if not present use !conda install -c conda-forge geocoder
import geocoder
# Google API key is required for the geocoder library to work, save the API key in OS environment variables as GOOGLE_API_KEY
# and then access thay key here
import os
# Use BING_API_KEY when choosing to use bing geocoding instead of google geocoding.
BING_API_KEY = 'AksNN-3luSfNBssyZ3Ju4i78nIrFLt1UtYo--YWQj9oyfxSwyXkdsqykWk3FeTXB' # os.environ['BING_API_KEY']

In [4]:
# This function will take an adress and return the latlng of that adress
def get_latlng(address):
    # using bing geocoder API since it is better.
    g = geocoder.bing(address, key = BING_API_KEY)
    return pd.Series(g.latlng)

In [None]:
# using the get_latlng function to define latitude and longitude columns of the data frame
df[['Latitude','Longitude']] = df.apply(lambda x: get_latlng(x.Village +', '+ x.Block + ', ' + x.District), axis=1)
df.head()

In [None]:
df.info()

In [None]:
df.dropna(inplace= True)
df.info()

## Step 4: Store the data in a csv

In [None]:
df.to_csv('ranchi_villages.csv')

## Visualize the villages on a map.

In [None]:
#!conda install -c conda-forge folium --yes # uncomment this line if folium is missing
import folium

In [5]:
rnc_latitude = 23.3441
rnc_longitude = 85.3096

In [None]:
# create map of Toronto using latitude and longitude values
map_ranchi = folium.Map(location=[rnc_latitude, rnc_longitude], zoom_start=10)

# data to be used for map
data = df.dropna()

# add markers to map
for lat, lng, village, block in zip(data['Latitude'], data['Longitude'], data['Village'], data['Block']):
    label = '{}, {}'.format(village, block)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=1.3,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_ranchi)  
    
map_ranchi