In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import sqlalchemy as db
import requests
import re
from sqlalchemy import text

In [31]:
username = "root"
password = "password"
server = "localhost"
database = "quakes"

engine = db.create_engine(f"mysql+pymysql://{username}:{password}@{server}/{database}")

<h2>Populate Magnitude Types</h2>

In [13]:
url = "https://www.usgs.gov/programs/earthquake-hazards/magnitude-types"
dfs = pd.read_html(requests.get(url).content)
dfs

[                                      Magnitude Type  Magnitude Range  \
 0          Mww (Moment W-phase)(generic notation Mw)  ~5.0 and larger   
 1                                     Mwc (centroid)  ~5.5 and larger   
 2                                    Mwb (body wave)     ~5.5 to ~7.0   
 3                                     Mwr (regional)     ~4.0 to ~6.5   
 4                    Ms20 or Ms (20sec surface wave)     ~5.0 to ~8.5   
 5                        mb (short-period body wave)     ~4.0 to ~6.5   
 6                          Mfa (felt-area magnitude)              any   
 7                               ML Ml, or ml (local)     ~2.0 to ~6.5   
 8   mb_Lg, mb_lg, or MLg (short-period surface wave)     ~3.5 to ~7.0   
 9                                Md or md (duration)    ~4 or smaller   
 10                     Mi or Mwp (integrated p-wave)     ~5.0 to ~8.0   
 11                                       Me (energy)  ~3.5 and larger   
 12                                   

In [26]:
mag_types = dfs[0].iloc[:, 0]
mag_types

0                                            Mww, Mw
1                                     Mwc (centroid)
2                                    Mwb (body wave)
3                                     Mwr (regional)
4                     Ms20 , Ms (20sec surface wave)
5                         mb (sh,t-period body wave)
6                          Mfa (felt-area magnitude)
7                                         ML, Ml, ml
8     mb_Lg, mb_lg, , MLg (sh,t-period surface wave)
9                                 Md , md (duration)
10                      Mi , Mwp (integrated p-wave)
11                                       Me (energy)
12                                                Mh
13                                               FFM
14                        Mint (intensity magnitude)
Name: Magnitude Type, dtype: object

In [30]:
#particular cases
mag_types[0] = 'Mww Mw'
mag_types[13] = 'FFM'
#remove 'or'
mag_types.replace('or', ' ', regex=True, inplace=True)
#remove content inside parentheses
mag_types.replace(r'\(.*\)|,', ' ', regex=True, inplace=True)

In [41]:
#populate MagnitudeTypes
with engine.connect() as con:
    for index, item in enumerate(mag_types):
        for mag_type in set(map(lambda x: x.lower(), filter(None, item.split(' ')))):
            con.execute(text(f"INSERT INTO MagnitudeTypes(id_type, name) VALUE ({index}, '{mag_type}');"))

In [44]:
#checking
with engine.connect() as con:
    print("id|id_type|name")
    for row in con.execute(text("SELECT * FROM MagnitudeTypes")):
        print(row)

id|id_type|name
(13, 0, 'mw')
(14, 0, 'mww')
(15, 1, 'mwc')
(16, 2, 'mwb')
(17, 3, 'mwr')
(18, 4, 'ms20')
(19, 4, 'ms')
(20, 5, 'mb')
(21, 6, 'mfa')
(22, 7, 'ml')
(23, 8, 'mb_lg')
(24, 8, 'mlg')
(25, 9, 'md')
(26, 10, 'mwp')
(27, 10, 'mi')
(28, 11, 'me')
(29, 12, 'mh')
(30, 13, 'ffm')
(31, 14, 'mint')


<h2>Populate Agencies</h2>

In [2]:
url = "https://earthquake.usgs.gov/data/comcat/catalog/"
soup = BeautifulSoup(requests.get(url).content, 'html.parser')
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <title>
   Catalogs
  </title>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <link href="/theme/site/earthquake/index.css" rel="stylesheet"/>
  <meta content="USGS Earthquake Hazards Program, responsible for monitoring, reporting, and researching earthquakes and earthquake hazards" name="description"/>
  <meta content="aftershock,earthquake,epicenter,fault,foreshock,geologist,geophysics,hazard,hypocenter,intensity,intensity scale,magnitude,magnitude scale,mercalli,plate,richter,seismic,seismicity,seismogram,seismograph,seismologist,seismology,subduction,tectonics,tsunami,quake,sismologico,sismologia" name="keywords"/>
  <script async="async" id="_fed_an_ua_tag" src="https://dap.digitalgov.gov/Universal-Federated-Analytics-Min.js?agency=DOI&amp;subagency=USGS">
  </script>
  <link href="https://fonts.googleapis.com/icon?family=Material+Icons|Merriweather:400,400italic,700|Source+Sans+Pro:400

In [11]:
divs = soup.find_all("div", {"class": "page-content"})
divs[0]

<div class="page-content">

In [33]:
with engine.connect() as con:
    for row in divs[0].find_all("li"):
        abb, full_name = map(str.strip, row.text.split('-', 1))
        con.execute(text(f"INSERT INTO Agencies(abbreviation, full_name) VALUE ('{abb}', '{full_name}')"))