In [22]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC 
from selenium.webdriver.support.ui import WebDriverWait
import os,time,re,math
from selenium.common.exceptions import StaleElementReferenceException

In [23]:
# Pre-processed datas to match with current backend system
using_district_list = {
    "ALIPURDUAR": 719,
    "BANKURA": 720,
    "BIRBHUM": 721,
    "COOCHBEHAR": 722,
    "DAKSHIN 24 PARGANA": 740,
    "DAKSHIN DINAJPUR": 723,
    "DARJEELING": 724,
    "HOOGHLY": 725,
    "HOWRAH": 726,
    "JALPAIGURI": 727,
    "JHARGRAM": 728,
    "KALIMPONG": 729,
    "KOLKATA METROPOLITAN AREA": 730,
    "MALDA": 731,
    "MURSHIDABAD": 732,
    "NADIA": 733,
    "PASCHIM BARDHAMAN": 735,
    "PASCHIM MEDINIPUR": 736,
    "PURBA BARDHHAMAN": 737,
    "PURBA MEDINIPUR": 738,
    "PURULIA": 739,
    "UTTAR 24 PARGANA": 734,
    "UTTAR DINAJPUR": 741
}

using_hospital_type={
    "Government Hospital": {
        "dbkey": "govt",
        "webkey": 1
    },
    "Govt. Requisitioned Pvt. Hospital": {
        "dbkey": "pvtundergovt",
        "webkey": 2
    },
    "Private Hospital": {
        "dbkey": "private",
        "webkey": 3
    }
}

# Config
PATH_TO_DRIVER = r'/usr/bin/chromedriver'
endpoint = 'https://excise.wb.gov.in/CHMS/Public/Page/CHMS_Public_Hospital_Bed_Availability.aspx'


chrome_options = Options()
# chrome_options.add_argument("--headless")
# chrome_options.add_argument("--window-size=1920x1080")


#Functions

#Init
def init_scrape(chrome_options,endpoint,PATH_TO_DRIVER=r'/usr/bin/chromedriver'):
    driver = webdriver.Chrome(executable_path = PATH_TO_DRIVER, options=chrome_options)
    driver.get(endpoint)
    wait = WebDriverWait(driver,30)
    wait.until(EC.element_to_be_clickable((By.ID,"ctl00_ContentPlaceHolder1_ddl_District")))
    return driver,wait

# Track the loading spinner
def still_loading(driver,wait):
    try:
        return driver.find_element_by_xpath('//*[@id="ctl00_ContentPlaceHolder1_upPgo"]').value_of_css_property("display")
    except:
        return "done"
    
    
# Select Radio Buttons
def select_hospital_type(driver,label):
    try:
        radio = driver.find_element_by_xpath("//*[@id='ctl00_ContentPlaceHolder1_rdo_Govt_Flag']/label[{}]".format(str(label)))
        radio.click()
        driver.implicitly_wait(10)
    except:
        raise Exception("ERROR : Can't Select Hospital Type")
        
        
# Click on <DISTRICT_NAME> district and load the pages
def select_district_and_wait_until_load(driver,wait,district_name):
    try:
        driver.find_element_by_xpath("//select[@name='ctl00$ContentPlaceHolder1$ddl_District']/option[text()='{}']".format(district_name)).click()
        while still_loading(driver,wait) != "none":
            driver.implicitly_wait(2)
        driver.implicitly_wait(5)
        wait.until(EC.element_to_be_clickable((By.ID,"ctl00_ContentPlaceHolder1_GridView2")))
    except:
        raise Exception("ERROR : Can't select district")
        

# Get no of pages (Pagination) for selected district
def no_of_pages_for_selected_district_and_type(driver):
    try:
        pages=len(driver.find_elements_by_xpath("//*[@id='ctl00_ContentPlaceHolder1_GridView2']/tbody/tr[1]/td/table/tbody/tr/td"))
        if pages == 0:
            pages = 1
        return pages
    except:
        raise Exception("ERROR : Can't get number of pages for selected district and type")
        
# Select page
def select_page_pagination_section(driver,page_no):
    try:
        driver.find_element_by_xpath("//*[@id='ctl00_ContentPlaceHolder1_GridView2']/tbody/tr[1]/td/table/tbody/tr/td[{}]/a".format(str(page_no))).click()
        while still_loading(driver,wait) != "none":
            driver.implicitly_wait(2)
        driver.implicitly_wait(2)
        wait.until(EC.element_to_be_clickable((By.ID,"ctl00_ContentPlaceHolder1_GridView2")))
    except:
        raise Exception("ERROR : Switching page for pagination")
        
def try_to_switch_to_first_page(driver):
    try:
        driver.find_element_by_xpath("//*[@id='ctl00_ContentPlaceHolder1_GridView2']/tbody/tr[1]/td/table/tbody/tr/td[{}]/a".format(str(1))).click()
        while still_loading(driver,wait) != "none":
            driver.implicitly_wait(2)
        driver.implicitly_wait(2)
        wait.until(EC.element_to_be_clickable((By.ID,"ctl00_ContentPlaceHolder1_GridView2")))
    except:
        print("ERROR : Maybe first page is already selected !")
        
# Click on all the "View Detailed break up option"
def toggle_detailed_break_up_section(driver):
    try:
        driver.find_element_by_xpath("//*[contains(@id,'_div_card')]/div[2]/div[2]/div[1]/a").click()
        driver.implicitly_wait(5)
    except:
        raise Exception("ERROR : Can't open detailed break up section")

        
def MobileCleanData(data):
    # Process data for DB
    # Database only have Big Integer field , String not accepted
    result = []
    copydata = None
    # Split data at "/" & "," for multiple mobile number
    if "/" in str(data):
        copydata = str(data).split("/")
    elif "," in str(data):
        copydata = str(data).split(",")
    else:
        copydata = [data]

    for item in copydata:
        try:
            # Remove "+"
            # Remove "(" & ")"
            # Remove "-"
            # Remove all blank spaces
            tmp = item.replace("+","").replace("(","").replace(")","").replace("-","").replace(" ","") 
            
            # Check whether its is empty or not before typecasting to int to reduce possibility of error
            if tmp.strip() != "":
                result.append(int(tmp))
        
        except ValueError:
            print(f"{item} failed")
            return 0,[0]
    # Will return a tupple of (<No of mobile numbers>, [list of <mobileno>])
    return len(result),result


scraping  = False

# Main Driver function for scraping
def scrape_data(driver):
    scraping = True

    # Get the list of hospotals entry
    entries = driver.find_elements_by_xpath("//*[contains(@id,'_div_card')]")
    
    if len(entries) == 0 :
        print("No items found")
        scraping = False
        return
    
    toggle_detailed_break_up_section(driver)

    
    # No of hospitals under selected category and dsitrct
    print(str(len(entries)) + " items fetched.")

    # Iterate through the entried and print the details
    for ei,e in enumerate(entries):
        
    #     print(ei)
        print("Name : {}".format(e.find_element_by_xpath(".//h5").text))
        print("Address : {}".format(e.find_element_by_xpath("(.//div/div/div)[1]").text))
        print("Phoneno : {}".format(MobileCleanData(e.find_element_by_xpath("(.//div/div/div)[2]//a").text)[1][0]))
        print("Total Beds : {}".format(e.find_element_by_xpath(".//div[2]/div[1]/div[4]/div/ul/li[1]/h3").text))
        print("Available Beds : {}".format(e.find_element_by_xpath(".//div[2]/div[1]/div[4]/div/ul/li[2]/h3").text))
        print("Verified On : {}".format(e.find_element_by_xpath(".//div[3]/small").text))
        print("------------------------")
        bed_categories = e.find_elements_by_xpath(".//div[2]/div[2]/div[2]/div/div")
        print("Categories of beds : {}".format(len(bed_categories)))
        for bc in bed_categories:
            # Make sure you have click on "View detailed Details" else you will get blank details
            try:
                print("Category : {}".format(bc.find_element_by_xpath(".//div/div[1]").text))
                print("Available : {}".format(bc.find_element_by_xpath(".//div/div[2]/div/div[4]/div/ul/li[2]/h3").text))
                print("Total : {}".format(bc.find_element_by_xpath(".//div/div[2]/div/div[4]/div/ul/li[1]/h3").text))
            except:
                print("Due to error skipping the category section")
            print("----category_ end-------\n")
            
        print("----------END------------")
    scraping = False

def still_loading(driver,wait):
    try:
        return driver.find_element_by_xpath('//*[@id="ctl00_ContentPlaceHolder1_upPgo"]').value_of_css_property("display")
    except:
        return "error"
    



In [24]:
# List of districts
# district_list = driver.find_element_by_id("ctl00_ContentPlaceHolder1_ddl_District")
# final_district_list = []
# for i in district_list.text.split("\n")[1:-1]:
#     final_district_list.append(i.strip())

In [25]:
# Options Radio Buttons
# RADIO_BUTTONS = driver.find_elements_by_xpath("//*[@id='ctl00_ContentPlaceHolder1_rdo_Govt_Flag']/label")
# for i,j in enumerate(RADIO_BUTTONS):
#     print(j.text,i)

## Test Code

### Steps one-by-one
* Call initialize Function > init_scrape
* First select the hospital type > select_hospital_type
* Then select district > select_district_and_wait_until_load
* Get page no > no_of_pages_for_selected_district_and_type
* Iterate through Pages
    * Call this function > toggle_detailed_break_up_section
    * Call scrape data to start scraping > scrape_data

In [26]:
driver,wait = init_scrape(chrome_options,endpoint)

In [None]:
for district_lable,district_id in using_district_list.items():
#     print(district_lable,district_id)
    for hospital_type in using_hospital_type:
        select_hospital_type(driver,using_hospital_type[hospital_type]["webkey"])
        select_district_and_wait_until_load(driver,wait,district_lable)
        time.sleep(5)
        no_of_pages = no_of_pages_for_selected_district_and_type(driver)
        print(f"\n\n{no_of_pages}\n\n")
        global_buggy_pnum = 0
        for pg_no in range(no_of_pages):
            if pg_no == 0:
                try_to_switch_to_first_page(driver)
                time.sleep(10)
            if pg_no != 0 or (pg_no==0 and global_buggy_pnum!=0) :
                while True:
                    if scraping:
                        time.sleep(2)
                    else:
                        global_buggy_pnum = pg_no
                        select_page_pagination_section(driver,pg_no+1)
                        time.sleep(10)
                        break
            
            while still_loading(driver,wait) != "none":
                driver.implicitly_wait(10)
            scrape_data(driver)
        print(f"Exiting from District : {district_lable} Type : {hospital_type} Page No : {pg_no}")



2


ERROR : Maybe first page is already selected !
10 items fetched.
Name :  AMRI, Salt Lake - Vivekananda Yuba Bharati Krirangan Salt Lake Stadium (Satellite Govt. Building)
Address :   JB Block, Sector III, Bidhannagar, Kolkata, West Bengal 700098 , KOLKATA METROPOLITAN AREA
Phoneno : 9163645544
Total Beds : 80
Available Beds : 41
Verified On : Last Updated On : 13/05/2021 11:09 AM
------------------------
Categories of beds : 5
Category : Covid Beds (Regular)
Available : 0
Total : 0
----category_ end-------

Category : Covid Beds with Oxygen Support
Available : 41
Total : 80
----category_ end-------

Category : HDU Beds (Covid)
Available : 0
Total : 0
----category_ end-------

Category : CCU Beds (Covid - without ventilator)
Available : 0
Total : 0
----category_ end-------

Category : CCU Beds (Covid - with ventilator)
Available : 0
Total : 0
----category_ end-------

----------END------------
Name :  CHITTARANJAN NATIONAL CANCER INSTITUTE-CNCI (Government Hospital)
Address :   St

Category : Covid Beds (Regular)
Available : 16
Total : 59
----category_ end-------

Category : Covid Beds with Oxygen Support
Available : 0
Total : 72
----category_ end-------

Category : HDU Beds (Covid)
Available : 0
Total : 16
----category_ end-------

Category : CCU Beds (Covid - without ventilator)
Available : 0
Total : 13
----category_ end-------

Category : CCU Beds (Covid - with ventilator)
Available : 0
Total : 6
----category_ end-------

----------END------------
Exiting from District : KOLKATA METROPOLITAN AREA Type : Government Hospital Page No : 1


2


10 items fetched.
Name :  AMRI HOSPITAL, SALT LAKE Govt Requisitioned (Govt. Requisitioned Pvt. Hospital)
Address :   16 17, JC Block Lane, Central Park Road Broadway Road, stadium gate number 3, opposite salt lake, Sector III, Bidhannagar, Kolkata, West Bengal 700098 , KOLKATA METROPOLITAN AREA
Phoneno : 3366063800
Total Beds : 25
Available Beds : 1
Verified On : Last Updated On : 13/05/2021 11:03 AM
----------------------

Available : 0
Total : 3
----category_ end-------

----------END------------
Name :  Peerless Hospital Govt Requisitioned (Govt. Requisitioned Pvt. Hospital)
Address :   360, Pancha Sayar Rd, Sahid Smirity Colony, Pancha Sayar, Kolkata, West Bengal 700094 , KOLKATA METROPOLITAN AREA
Phoneno : 3340111222
Total Beds : 30
Available Beds : 0
Verified On : Last Updated On : 13/05/2021 12:44 PM
------------------------
Categories of beds : 5
Category : Covid Beds (Regular)
Available : 0
Total : 0
----category_ end-------

Category : Covid Beds with Oxygen Support
Available : 0
Total : 20
----category_ end-------

Category : HDU Beds (Covid)
Available : 0
Total : 0
----category_ end-------

Category : CCU Beds (Covid - without ventilator)
Available : 0
Total : 10
----category_ end-------

Category : CCU Beds (Covid - with ventilator)
Available : 0
Total : 0
----category_ end-------

----------END------------
1 items fetched.
Name :  Rabindranath Tagore International Institute of Cardiac Scienc

Total : 22
----category_ end-------

----------END------------
Name :  Apex Institute of Medical Sciences (Private Hospital (Self Run))
Address :   1219, PG Survey Park Road, Sammilani Park Rd, near Big Bazar, Hiland Park, Survey Park, Santoshpur, Kolkata, West Bengal 700075 , KOLKATA METROPOLITAN AREA
Phoneno : 3371256666
Total Beds : 24
Available Beds : 1
Verified On : Last Updated On : 13/05/2021 10:27 AM
------------------------
Categories of beds : 5
Category : Covid Beds (Regular)
Available : 1
Total : 6
----category_ end-------

Category : Covid Beds with Oxygen Support
Available : 0
Total : 14
----category_ end-------

Category : HDU Beds (Covid)
Available : 0
Total : 4
----category_ end-------

Category : CCU Beds (Covid - without ventilator)
Available : 0
Total : 0
----category_ end-------

Category : CCU Beds (Covid - with ventilator)
Available : 0
Total : 0
----category_ end-------

----------END------------
Name :  Apollo Gleneagles Hospital (Private Hospital (Self Run))
A

Address :   2, Rawdon St, Mullick Bazar, Park Street area, Kolkata, West Bengal 700017 , KOLKATA METROPOLITAN AREA
Phoneno : 3340405000
Total Beds : 0
Available Beds : 0
Verified On : Last Updated On : 10/05/2021 12:53 PM
------------------------
Categories of beds : 5
Category : Covid Beds (Regular)
Available : 0
Total : 0
----category_ end-------

Category : Covid Beds with Oxygen Support
Available : 0
Total : 0
----category_ end-------

Category : HDU Beds (Covid)
Available : 0
Total : 0
----category_ end-------

Category : CCU Beds (Covid - without ventilator)
Available : 0
Total : 0
----category_ end-------

Category : CCU Beds (Covid - with ventilator)
Available : 0
Total : 0
----category_ end-------

----------END------------
Name :  BMRC HOSPITALS (Private Hospital (Self Run))
Address :   6/6, Barrackpore Trunk Rd, Talpukur, Titagarh, Barrackpore, West Bengal 700120 , KOLKATA METROPOLITAN AREA
Phoneno : 18003455500
Total Beds : 33
Available Beds : 1
Verified On : Last Updated O

Available : 0
Total : 11
----category_ end-------

Category : HDU Beds (Covid)
Available : 0
Total : 0
----category_ end-------

Category : CCU Beds (Covid - without ventilator)
Available : 0
Total : 1
----category_ end-------

Category : CCU Beds (Covid - with ventilator)
Available : 0
Total : 1
----category_ end-------

----------END------------
Name :  Ekbalpur Nursing Home (Private Hospital (Self Run))
Address :   9, Ibrahim Rd, Ekbalpur, Khidirpur, Kolkata, West Bengal 700023 , KOLKATA METROPOLITAN AREA
Phoneno : 3371440909
Total Beds : 60
Available Beds : 0
Verified On : Last Updated On : 13/05/2021 03:20 PM
------------------------
Categories of beds : 5
Category : Covid Beds (Regular)
Available : 0
Total : 20
----category_ end-------

Category : Covid Beds with Oxygen Support
Available : 0
Total : 26
----category_ end-------

Category : HDU Beds (Covid)
Available : 0
Total : 5
----category_ end-------

Category : CCU Beds (Covid - without ventilator)
Available : 0
Total : 6
---

Total : 30
----category_ end-------

Category : HDU Beds (Covid)
Available : 0
Total : 0
----category_ end-------

Category : CCU Beds (Covid - without ventilator)
Available : 0
Total : 12
----category_ end-------

Category : CCU Beds (Covid - with ventilator)
Available : 0
Total : 3
----category_ end-------

----------END------------
Name :  ILS Hospitals, Dumdum (Private Hospital (Self Run))
Address :   1, Mall Road, near Nager Bazar Flyover, Dum Dum, Kolkata, West Bengal 700080 , KOLKATA METROPOLITAN AREA
Phoneno : 3340315041
Total Beds : 100
Available Beds : 0
Verified On : Last Updated On : 13/05/2021 09:08 AM
------------------------
Categories of beds : 5
Category : Covid Beds (Regular)
Available : 0
Total : 0
----category_ end-------

Category : Covid Beds with Oxygen Support
Available : 0
Total : 75
----category_ end-------

Category : HDU Beds (Covid)
Available : 0
Total : 0
----category_ end-------

Category : CCU Beds (Covid - without ventilator)
Available : 0
Total : 0
---

Category : CCU Beds (Covid - without ventilator)
Available : 0
Total : 0
----category_ end-------

Category : CCU Beds (Covid - with ventilator)
Available : 0
Total : 0
----category_ end-------

----------END------------
Name :  Medica Superspecialty Hospital (Private Hospital (Self Run))
Address :   127, Eastern Metropolitan Bypass, Nitai Nagar, Mukundapur, Kolkata, West Bengal 700099 , KOLKATA METROPOLITAN AREA
Phoneno : 7044488841
Total Beds : 264
Available Beds : 11
Verified On : Last Updated On : 13/05/2021 11:14 AM
------------------------
Categories of beds : 5
Category : Covid Beds (Regular)
Available : 0
Total : 0
----category_ end-------

Category : Covid Beds with Oxygen Support
Available : 11
Total : 122
----category_ end-------

Category : HDU Beds (Covid)
Available : 0
Total : 45
----category_ end-------

Category : CCU Beds (Covid - without ventilator)
Available : 0
Total : 57
----category_ end-------

Category : CCU Beds (Covid - with ventilator)
Available : 0
Total : 4

Address :   73 & 81B, Bagmari Road, Near Ultadanga Hudco Stop, Kolkata, West Bengal 700054 , KOLKATA METROPOLITAN AREA
Phoneno : 3366050999
Total Beds : 74
Available Beds : 0
Verified On : Last Updated On : 13/05/2021 12:09 PM
------------------------
Categories of beds : 5
Category : Covid Beds (Regular)
Available : 0
Total : 0
----category_ end-------

Category : Covid Beds with Oxygen Support
Available : 0
Total : 56
----category_ end-------

Category : HDU Beds (Covid)
Available : 0
Total : 5
----category_ end-------

Category : CCU Beds (Covid - without ventilator)
Available : 0
Total : 9
----category_ end-------

Category : CCU Beds (Covid - with ventilator)
Available : 0
Total : 4
----category_ end-------

----------END------------


In [None]:
# select_district_and_wait_until_load(driver,wait,"BANKURA")
# select_hospital_type(driver,3)
# no_of_pages = no_of_pages_for_selected_district_and_type(driver)
# print(no_of_pages)
# global_buggy_pnum = 0
# for pg_no in range(no_of_pages):
#     print(f"\nGoing on page {pg_no+1}\n")
#     if pg_no != 0 :
#         global_buggy_pnum = pg_no
#         select_page_pagination_section(driver,pg_no+1)
#         toggle_detailed_break_up_section(driver)

#     toggle_detailed_break_up_section(driver)
#     scrape_data(driver)
# print(f"Exiting from District : {district_lable} Type : {hospital_type} Page No : {pg_no}")

In [None]:
# select_district_and_wait_until_load(driver,wait,"BANKURA")
# select_hospital_type(driver,2)
# # select_page_pagination_section(driver,3)
# scrape_data(driver)

In [None]:
!git add .
!git commit -m "update"
!git push origin main