<a href="https://colab.research.google.com/github/ApoorvGuptaAi/iitk-india-covid-data-parser/blob/main/Bihar_HospitalDataParser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from bs4 import BeautifulSoup
from datetime import datetime
from dataclasses import dataclass
from typing import List
import dateutil
import requests

In [2]:
@dataclass
class Resource:
  resource_type: str    # Resource type defined at https://github.com/abhinavj13/iitk-covid-help-api/blob/master/src/services/dataLead/db/enum.ts   
  resource_description: str
  resource_total: int
  resource_available: int

@dataclass
class Hospital:
  name: str
  address: str
  district: str
  city: str
  state: str
  location: str
  last_updated: datetime
  resources: List[Resource]
  

In [5]:
def get_bihar_soup():
  covid_bihar_home_url = 'https://covid19health.bihar.gov.in/DailyDashboard/BedsOccupied'
  bihar_home_response = requests.get(covid_bihar_home_url)
  return BeautifulSoup(bihar_home_response.text, 'html.parser')

bihar_soup = get_bihar_soup()

In [8]:
def get_updated_timestamp(updated_text):
  if updated_text:
    return dateutil.parser.parse(updated_text)

In [10]:
def parse_hospital_data(hospital_tds):
  parsed_data = {
      'district': hospital_tds[0].find('span', {'class': 'bed-district'}).text,
      'name': hospital_tds[1].find('span', {'class': 'bed-title'}).text,
      'map_href': hospital_tds[1].find('a')['href'],
      'category': hospital_tds[2].text,
      'last_updated': get_updated_timestamp(hospital_tds[3].text),
      'total_beds': hospital_tds[4].text,
      'vacant_beds': hospital_tds[5].text,
      'icu_beds': hospital_tds[6].text,
      'vacant_icu_beds': hospital_tds[7].text,
      'contact_phone': hospital_tds[8].text,
  }
  return parsed_data

def get_bihar_data(bihar_soup):
  master_table = bihar_soup.find('table', {'id': 'example'})
  hospital_rows = master_table.find_all('tr')
  all_hospitals = []
  for hospital_row in hospital_rows[1:]:
    hospital_tds = hospital_row.find_all('td')
    hospital_data = parse_hospital_data(hospital_tds)
    hospital = Hospital('', '', '', '', '', '', datetime.now, None)
    all_hospitals.append(hospital)
    hospital.name = hospital_data['name']
    hospital.district = hospital_data['district']
    hospital.state = 'BIHAR'
    hospital.location = hospital_data['map_href']
    hospital.last_updated = hospital_data['last_updated']
    hospital.resources = [
      Resource(
          resource_type='TOTAL_BEDS',
          resource_description='hospital beds',
          resource_total=hospital_data['total_beds'],
          resource_available=hospital_data['vacant_beds']),
      Resource(
          resource_type='ICU_WITHOUT_VENTILATOR',
          resource_description='icu beds',
          resource_total=hospital_data['icu_beds'],
          resource_available=hospital_data['vacant_icu_beds']),                
    ]
    print(hospital)
  return all_hospitals

all_hospitals = get_bihar_data(bihar_soup)

Hospital(name='DH Nawada', address='', district='NAWADA', city='', state='BIHAR', location='/DailyDashboard/Map?lat=24.88726&lon=85.54591&title=DH Nawada,&dis=NAWADA', last_updated=datetime.datetime(2021, 5, 2, 19, 4), resources=[Resource(resource_type='TOTAL_BEDS', resource_description='hospital beds', resource_total='60', resource_available='39'), Resource(resource_type='ICU_WITHOUT_VENTILATOR', resource_description='icu beds', resource_total='10', resource_available='10')])
Hospital(name='Sub Divisional Hospital Forbesganj, Araria', address='', district='ARARIA', city='', state='BIHAR', location='/DailyDashboard/Map?lat=26.30229&lon=87.25456&title=Sub Divisional Hospital Forbesganj, Araria,&dis=ARARIA', last_updated=datetime.datetime(2021, 4, 26, 8, 55), resources=[Resource(resource_type='TOTAL_BEDS', resource_description='hospital beds', resource_total='75', resource_available='75'), Resource(resource_type='ICU_WITHOUT_VENTILATOR', resource_description='icu beds', resource_total='0