# Scraping the KMPDC website

In [None]:
# import resources

import requests
from bs4 import BeautifulSoup
import csv

In [None]:
# check is the site can be accessed

url = 'https://kmpdc.go.ke/Registers/H-Facilities.php'
response = requests.get(url)
response.status_code

In [None]:
# parse the html and display output

soup = BeautifulSoup(response.content, 'html.parser')
print(soup.prettify())

In [None]:
# find all table headers
headers = soup.find_all('th')

# create an empty list to store column titles
columns = []

# get only the strings, convert them to title case and append to list
for h in headers:
    h = h.get_text().title()
    columns.append(h)

columns

In [None]:
# add other columns for the geocoding query and coordinates
additional_columns = ['Geocoding_Query', 'Latitude', 'Longitude']

columns.extend(additional_columns)

columns

In [None]:
table_body = soup.find_all('tbody')

data = []

# get all the text from each row, and keep each facility in its own list
for row in table_body:
    row = row.find_all('tr')
    for col in row:
        col = col.find_all('td')

        clean_cols = []
        
        for c in col:
            text = c.get_text(strip=True).title()
            clean_cols.append(text)
            col = clean_cols
        data.append(col)

data

In [None]:
# create a string that will be used to search for each facility i.e. a geocoding query
for row in data:
    facility_name = row[0].strip()
    address = row[2].strip()
    facility_type = row[3].strip()
    county = row[5].strip()
    geocoding_query = f"{facility_name}, {address}, {facility_type}, {county}, Kenya"

    # add the geocoding query to each facility's data
    row.append(geocoding_query)
    print(row)


In [None]:
with open('KMPDC_facilities.csv', 'w') as file:
    writer = csv.writer(file)
    writer.writerow(columns)    # write header row of csv
    writer.writerows(data)      # write data into csv
print("Data has been written into KMPDC_facilities.csv")