In [7]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [8]:
# Requesting OutsideHTML of the website
url = 'https://sulit.ph/list-of-hospitals-in-metro-manila-with-contact-details-website-and-social-media-accounts/'

page = requests.get(url)

soup = BeautifulSoup(page.text, 'html')

In [9]:
# Finding the table from the HTML
table = soup.find('table')

In [10]:
# Finding the table headers
labels = table.find_all('th')

In [11]:
# Storing just the header values in a list
table_categories = [title.text for title in labels]
table_categories

['CITY',
 'NAME OF HOSPITAL',
 'CONTACT NUMBER',
 'WEBSITE / EMAIL',
 'FACEBOOK LINK']

In [12]:
# Creating a DataFrame using the columns list
hospitals = pd.DataFrame(columns = table_categories)
hospitals

Unnamed: 0,CITY,NAME OF HOSPITAL,CONTACT NUMBER,WEBSITE / EMAIL,FACEBOOK LINK


In [13]:
# Finding all the row data from the table
column_data = table.find_all('tr')

In [14]:
# Adding each row value into the DataFrame
for row in column_data[3:]:
  row_data = row.find_all('td')
  indiv_row_data = [data.text for data in row_data]

  length = len(hospitals)
  hospitals.loc[length] = indiv_row_data

hospitals

Unnamed: 0,CITY,NAME OF HOSPITAL,CONTACT NUMBER,WEBSITE / EMAIL,FACEBOOK LINK
0,Caloocan,Caloocan City Medical Center,"South 5310 7925, North 8282 3397, 0943 216 6963",,https://www.facebook.com/Caloocan-City-Medical...
1,Caloocan,Dr. Jose N. Rodriguez Memorial Hospital and Sa...,"0966 549 2697, 8294 2571 to 73",http://djnrmh.doh.gov.ph/,https://www.facebook.com/officialDJNRMHS
2,Caloocan,MCU – FDT Medical Foundations Hospital,8367 2031,https://www.mcuhospital.org/,
3,Caloocan,Metro Balayan Medical Center,(043) 740 1350,http://www.metrobalayanmc.com.ph/,https://www.facebook.com/metrobalayan/
4,Las Pinas,Alabang Medical Center,"8807 8189, 8850 8719",,https://www.facebook.com/alabangmedicalcenter
...,...,...,...,...,...
91,Taguig,Medical Center of Taguig,8888 6284,,https://www.facebook.com/mctadminofficial/
92,Valenzuela,Allied Care Experts (ACE) Medical Center,"direct line to Admission 0917 844 3654, 8332 0...",,https://www.facebook.com/ACEMC-Valenzuela-Offi...
93,Valenzuela,Fatima University Medical Center,8291 6538,,
94,Valenzuela,Valenzuela Citicare Medical Center,"8860 9300, 8860 9300",http://www.citicare.com.ph/,https://www.facebook.com/vcmcthpgi/


In [15]:
# Saving the DataFrame as a CSV file
hospitals.to_csv('/content/hospitals.csv', index=False)

In [16]:
# Reading the hospitals.csv file
hospitals = pd.read_csv('/content/hospitals.csv')
hospitals.head()

Unnamed: 0,CITY,NAME OF HOSPITAL,CONTACT NUMBER,WEBSITE / EMAIL,FACEBOOK LINK
0,Caloocan,Caloocan City Medical Center,"South 5310 7925, North 8282 3397, 0943 216 6963",,https://www.facebook.com/Caloocan-City-Medical...
1,Caloocan,Dr. Jose N. Rodriguez Memorial Hospital and Sa...,"0966 549 2697, 8294 2571 to 73",http://djnrmh.doh.gov.ph/,https://www.facebook.com/officialDJNRMHS
2,Caloocan,MCU – FDT Medical Foundations Hospital,8367 2031,https://www.mcuhospital.org/,
3,Caloocan,Metro Balayan Medical Center,(043) 740 1350,http://www.metrobalayanmc.com.ph/,https://www.facebook.com/metrobalayan/
4,Las Pinas,Alabang Medical Center,"8807 8189, 8850 8719",,https://www.facebook.com/alabangmedicalcenter


In [17]:
# Checking the column datatypes
hospitals.dtypes

CITY                object
NAME OF HOSPITAL    object
CONTACT NUMBER      object
WEBSITE / EMAIL     object
FACEBOOK LINK       object
dtype: object

In [18]:
# Dropping the Website/Email and Facebook Link columns
hospitals = hospitals.drop(['WEBSITE / EMAIL','FACEBOOK LINK'], axis=1)
hospitals

Unnamed: 0,CITY,NAME OF HOSPITAL,CONTACT NUMBER
0,Caloocan,Caloocan City Medical Center,"South 5310 7925, North 8282 3397, 0943 216 6963"
1,Caloocan,Dr. Jose N. Rodriguez Memorial Hospital and Sa...,"0966 549 2697, 8294 2571 to 73"
2,Caloocan,MCU – FDT Medical Foundations Hospital,8367 2031
3,Caloocan,Metro Balayan Medical Center,(043) 740 1350
4,Las Pinas,Alabang Medical Center,"8807 8189, 8850 8719"
...,...,...,...
91,Taguig,Medical Center of Taguig,8888 6284
92,Valenzuela,Allied Care Experts (ACE) Medical Center,"direct line to Admission 0917 844 3654, 8332 0..."
93,Valenzuela,Fatima University Medical Center,8291 6538
94,Valenzuela,Valenzuela Citicare Medical Center,"8860 9300, 8860 9300"


In [19]:
# Rearranging columns
hospitals = hospitals[['NAME OF HOSPITAL','CITY','CONTACT NUMBER']]
hospitals

Unnamed: 0,NAME OF HOSPITAL,CITY,CONTACT NUMBER
0,Caloocan City Medical Center,Caloocan,"South 5310 7925, North 8282 3397, 0943 216 6963"
1,Dr. Jose N. Rodriguez Memorial Hospital and Sa...,Caloocan,"0966 549 2697, 8294 2571 to 73"
2,MCU – FDT Medical Foundations Hospital,Caloocan,8367 2031
3,Metro Balayan Medical Center,Caloocan,(043) 740 1350
4,Alabang Medical Center,Las Pinas,"8807 8189, 8850 8719"
...,...,...,...
91,Medical Center of Taguig,Taguig,8888 6284
92,Allied Care Experts (ACE) Medical Center,Valenzuela,"direct line to Admission 0917 844 3654, 8332 0..."
93,Fatima University Medical Center,Valenzuela,8291 6538
94,Valenzuela Citicare Medical Center,Valenzuela,"8860 9300, 8860 9300"


In [20]:
# Checking for NaN values in the DataFrame
hospitals.isnull().sum().sum()

0