#Scraping Earthquake data from 2018-2022 from the Philvocs DOST Website

# Importing necessary libraries
We use BeautifulSoup to scrape the Philvocs website as it is a static website using HTML.

In [1]:
import requests
from bs4 import BeautifulSoup

import numpy as np
import pandas as pd
import re
import time

We want to obtain Earthquake information over several months and years.
Unfortunately, the information of earthquakes are separated per month AND year into different webpages. Below we initialize a list and use loops modify the base URL so that we can access each link later.

In [2]:
links = []
base_url = "https://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/"
years = [2018, 2019, 2020, 2021,2022,2023]
months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
for year in years:
    for month in months:
        url = base_url + str(year) + "/" + str(year)+ "_" + month + ".html"
        links.append(url)

for i in ["January", "February", "March", "April", "May", "June"]:
    url = base_url + "2024/2024_" + i + ".html"
    links.append(url)

links

['https://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_January.html',
 'https://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_February.html',
 'https://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_March.html',
 'https://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_April.html',
 'https://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_May.html',
 'https://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_June.html',
 'https://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_July.html',
 'https://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_August.html',
 'https://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_September.html',
 'https://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_October.html',
 'https://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_November.html',
 'https://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_December.html',
 '

Afterwhich, we get the access the content in each of the websites stored in the list above. The earthquake information is stored in tables and we can use their html tags to retrieve them using Beautiful Soup.

In [3]:
all_values = []
for link in links:
    web_url = link
    retries = 3
    for _ in range(retries):
        try:
            response = requests.get(web_url, verify=False)
            response.raise_for_status()  # Raise an exception for bad status codes
            break  # Exit the retry loop if successful
        except requests.exceptions.RequestException as e:
            print(f"Error fetching {web_url}: {e}")
            time.sleep(5)  # Wait for 5 seconds before retrying
    else:
        print(f"Failed to fetch {web_url} after {retries} retries.")
        continue  # Skip to the next link if all retries fail

    soup = BeautifulSoup(response.content, "html.parser")
    table = soup.find_all('table', class_="MsoNormalTable")[2]

    value = [i.text for i in table.find_all('tr')]

    for b in range(len(value)):
        value[b] = value[b].replace("\n", "")
        value[b] = value[b].replace("\r", "")
        value[b] = value[b].replace("\t", " ")
        value[b] = value[b].replace("\xa0", " ")
        value[b] = value[b].lstrip()
        value[b] = re.sub(r'\s{2,}', ' ', value[b])

    all_values.append(value)
    print("Done with"+web_url)



Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_January.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_February.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_March.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_April.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_May.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_June.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_July.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_August.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_September.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_October.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_November.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2018/2018_December.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2019/2019_January.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2019/2019_February.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2019/2019_March.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2019/2019_April.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2019/2019_May.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2019/2019_June.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2019/2019_July.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2019/2019_August.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2019/2019_September.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2019/2019_October.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2019/2019_November.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2019/2019_December.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2020/2020_January.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2020/2020_February.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2020/2020_March.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2020/2020_April.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2020/2020_May.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2020/2020_June.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2020/2020_July.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2020/2020_August.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2020/2020_September.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2020/2020_October.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2020/2020_November.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2020/2020_December.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2021/2021_January.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2021/2021_February.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2021/2021_March.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2021/2021_April.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2021/2021_May.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2021/2021_June.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2021/2021_July.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2021/2021_August.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2021/2021_September.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2021/2021_October.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2021/2021_November.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2021/2021_December.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2022/2022_January.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2022/2022_February.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2022/2022_March.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2022/2022_April.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2022/2022_May.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2022/2022_June.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2022/2022_July.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2022/2022_August.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2022/2022_September.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2022/2022_October.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2022/2022_November.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2022/2022_December.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2023/2023_January.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2023/2023_February.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2023/2023_March.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2023/2023_April.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2023/2023_May.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2023/2023_June.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2023/2023_July.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2023/2023_August.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2023/2023_September.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2023/2023_October.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2023/2023_November.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2023/2023_December.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2024/2024_January.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2024/2024_February.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2024/2024_March.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2024/2024_April.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2024/2024_May.html




Done withhttps://earthquake.phivolcs.dost.gov.ph/EQLatest-Monthly/2024/2024_June.html


In [12]:
print(len(all_values))
all_values[0:5]

78


[['Date - Time (Philippine Time)Latitude (ºN)Longitude (ºE)Depth (km)MagLocation',
  'January',
  '31 January 2018 - 11:07 PM 13.20 125.48 025 2.8 082m N 29° E of Palapag (Northern Samar)',
  '31 January 2018 - 10:32 PM 11.68 124.26 007 3.7 011 km S 87° W of Kawayan (Biliran)',
  '31 January 2018 - 08:23 PM 13.05 120.56 034 3.5 018 km S 83° W of Santa Cruz (Occidental Mindoro)',
  '31 January 2018 - 06:42 PM 14.11 120.43 122 1.9 021 km N 82° W of Nasugbu (Batangas)',
  '31 January 2018 - 12:28 AM 18.71 120.87 022 3.3 017 km N 29° E of Pagudpud (Ilocos Norte)',
  '30 January 2018 - 11:54 PM 18.64 120.73 016 2.1 009 km N 43° W of Pagudpud (Ilocos Norte)',
  '30 January 2018 - 10:46 PM 09.89 125.36 005 3.3 015 km N 28° W of San Francisco (Anao-aon) (Surigao Del Norte)',
  '30 January 2018 - 10:11 PM 19.28 121.08 015 4.9 040 km S 89° W of Calayan (Cagayan)',
  '30 January 2018 - 09:40 PM 18.36 120.52 008 3.1 011 km N 76° W of Pasuquin (Ilocos Norte)',
  '30 January 2018 - 07:55 PM 17.05 12

Each Webpage's information results are each put into a list which is then appended into a larger list. Below we make it so that the results are only in one list so we can store it into a DataFrame later.

In [5]:
one_value_list = []
for i in all_values:
    for j in i:
        one_value_list.append(j)
one_value_list


['Date - Time (Philippine Time)Latitude (ºN)Longitude (ºE)Depth (km)MagLocation',
 'January',
 '31 January 2018 - 11:07 PM 13.20 125.48 025 2.8 082m N 29° E of Palapag (Northern Samar)',
 '31 January 2018 - 10:32 PM 11.68 124.26 007 3.7 011 km S 87° W of Kawayan (Biliran)',
 '31 January 2018 - 08:23 PM 13.05 120.56 034 3.5 018 km S 83° W of Santa Cruz (Occidental Mindoro)',
 '31 January 2018 - 06:42 PM 14.11 120.43 122 1.9 021 km N 82° W of Nasugbu (Batangas)',
 '31 January 2018 - 12:28 AM 18.71 120.87 022 3.3 017 km N 29° E of Pagudpud (Ilocos Norte)',
 '30 January 2018 - 11:54 PM 18.64 120.73 016 2.1 009 km N 43° W of Pagudpud (Ilocos Norte)',
 '30 January 2018 - 10:46 PM 09.89 125.36 005 3.3 015 km N 28° W of San Francisco (Anao-aon) (Surigao Del Norte)',
 '30 January 2018 - 10:11 PM 19.28 121.08 015 4.9 040 km S 89° W of Calayan (Cagayan)',
 '30 January 2018 - 09:40 PM 18.36 120.52 008 3.1 011 km N 76° W of Pasuquin (Ilocos Norte)',
 '30 January 2018 - 07:55 PM 17.05 120.79 020 2.6

In [6]:
df = pd.DataFrame(one_value_list)
df

Unnamed: 0,0
0,Date - Time (Philippine Time)Latitude (ºN)Long...
1,January
2,31 January 2018 - 11:07 PM 13.20 125.48 025 2....
3,31 January 2018 - 10:32 PM 11.68 124.26 007 3....
4,31 January 2018 - 08:23 PM 13.05 120.56 034 3....
...,...
85916,01 June 2024 - 12:39 AM 14.98122.020112.0 016 ...
85917,01 June 2024 - 12:20 AM 16.07120.150103.0 004 ...
85918,01 June 2024 - 12:16 AM 10.71125.260131.9 023 ...
85919,01 June 2024 - 12:12 AM 10.14126.270101.5 025 ...


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 85805 entries, 0 to 85920
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       85805 non-null  object
dtypes: object(1)
memory usage: 1.3+ MB


In [14]:
df = df.drop_duplicates()

In [15]:
df

Unnamed: 0,0
0,Date - Time (Philippine Time)Latitude (ºN)Long...
1,January
2,31 January 2018 - 11:07 PM 13.20 125.48 025 2....
3,31 January 2018 - 10:32 PM 11.68 124.26 007 3....
4,31 January 2018 - 08:23 PM 13.05 120.56 034 3....
...,...
85916,01 June 2024 - 12:39 AM 14.98122.020112.0 016 ...
85917,01 June 2024 - 12:20 AM 16.07120.150103.0 004 ...
85918,01 June 2024 - 12:16 AM 10.71125.260131.9 023 ...
85919,01 June 2024 - 12:12 AM 10.14126.270101.5 025 ...


Finally we export into csv to be used in our Earthquake Dashboard

In [9]:
from google.colab import files
df.to_csv('scraped eq.csv', index = False)
files.download('scraped eq.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Testing so that I dont have to wait 5 minutes for nothing again


In [10]:
web_url = links[1]
response = requests.get(web_url, verify=False)
soup = BeautifulSoup(response.content, "html.parser")
table = soup.find_all('table', class_="MsoNormalTable")[2]

value = [i.text for i in table.find_all('tr')]

for b in range(len(value)):
        value[b] = value[b].replace("\n", "")
        value[b] = value[b].replace("\r", "")
        value[b] = value[b].replace("\t", " ")
        value[b] = value[b].replace("\xa0", " ")
        value[b] = value[b].lstrip()
        value[b] = re.sub(r'\s{2,}', ' ', value[b])



In [11]:
value

['Date - Time (Philippine Time)Latitude (ºN)Longitude (ºE)Depth (km)MagLocation',
 'FEBRUARY',
 '28 February 2018 - 11:57 PM 14.10 122.52 001 2.0 016 km N 06° W of Tagkawayan (Quezon)',
 '28 February 2018 - 11:43 PM 08.99 122.78 011 2.0 029 km S 74° W of Siaton (Negros Oriental)',
 '28 February 2018 - 11:05 PM 13.25 120.20 014 2.5 034 km S 58° W of Paluan (Occidental Mindoro)',
 '28 February 2018 - 10:22 PM 14.27 120.50 139 2.0 019 km S 06° E of Mariveles (Bataan)',
 '28 February 2018 - 09:11 PM 05.65 126.99 132 3.3 151 km S 42° E of Governor Generoso (Davao Oriental)',
 '28 February 2018 - 09:04 PM 07.47 124.23 032 2.1 006 km S 22° W of Matanog (Maguindanao)',
 '28 February 2018 - 07:44 PM 13.73 122.73 004 2.1 012 km S 32° W of Ragay (Camarines Sur)',
 '28 February 2018 - 02:49 PM 02.74 126.73 1914.0 329 km S 25° E of Sarangani (Davao Occidental)',
 '28 February 2018 - 12:30 PM 16.32 119.95 029 2.1 003 km N 06° W of Anda (Pangasinan)',
 '28 February 2018 - 11:12 AM 11.71 125.74 029 3.