In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

### Retrieve an arbitrary Wikipedia page of "Python" and create a list of links on that page

In [2]:
url = 'https://en.wikipedia.org/wiki/Python_(mythology)'

In [3]:
response = requests.get(url)
response.status_code

200

In [4]:
soup = BeautifulSoup(response.content)

In [5]:
soup.find_all('a', href = True)

[<a class="mw-jump-link" href="#mw-head">Jump to navigation</a>,
 <a class="mw-jump-link" href="#searchInput">Jump to search</a>,
 <a href="/wiki/Pythia" title="Pythia">Pythia</a>,
 <a class="mw-redirect mw-disambig" href="/wiki/Python_(disambiguation)" title="Python (disambiguation)">Python (disambiguation)</a>,
 <a class="image" href="/wiki/File:Apollo_dan_Pithon.jpg"><img alt="" class="thumbimage" data-file-height="436" data-file-width="584" decoding="async" height="261" src="//upload.wikimedia.org/wikipedia/commons/thumb/6/6d/Apollo_dan_Pithon.jpg/350px-Apollo_dan_Pithon.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/6/6d/Apollo_dan_Pithon.jpg/525px-Apollo_dan_Pithon.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/6/6d/Apollo_dan_Pithon.jpg 2x" width="350"/></a>,
 <a class="internal" href="/wiki/File:Apollo_dan_Pithon.jpg" title="Enlarge"></a>,
 <a href="/wiki/Apollo" title="Apollo">Apollo</a>,
 <a href="/wiki/Virgil_Solis" title="Virgil Solis">Virgil Solis</a>,
 <a

In [6]:
links = []


for a in soup.find_all('a', href = True):
    links.append(a['href'])

links

['#mw-head',
 '#searchInput',
 '/wiki/Pythia',
 '/wiki/Python_(disambiguation)',
 '/wiki/File:Apollo_dan_Pithon.jpg',
 '/wiki/File:Apollo_dan_Pithon.jpg',
 '/wiki/Apollo',
 '/wiki/Virgil_Solis',
 '/wiki/Ovid',
 '/wiki/Metamorphoses',
 '/wiki/Greek_mythology',
 '/wiki/Greek_language',
 '/wiki/Genitive_case',
 '/wiki/Serpent_(symbolism)',
 '/wiki/Medieval',
 '/wiki/Dragon',
 '/wiki/Earth',
 '/wiki/Delphi',
 '#Mythology',
 '#Versions_and_interpretations',
 '#See_also',
 '#Notes',
 '#References',
 '/w/index.php?title=Python_(mythology)&action=edit&section=1',
 '/wiki/Pythia',
 '/wiki/Gaia_(mythology)',
 '#cite_note-1',
 '/wiki/Axis_mundi#Places',
 '/wiki/Omphalos',
 '/wiki/Chthonic',
 '/wiki/Apollo',
 '#cite_note-2',
 '#cite_note-3',
 '/w/index.php?title=Python_(mythology)&action=edit&section=2',
 '/wiki/File:Pietro_Francavilla_-_Apollo_Victorious_over_the_Python_-_Walters_27302.jpg',
 '/wiki/File:Pietro_Francavilla_-_Apollo_Victorious_over_the_Python_-_Walters_27302.jpg',
 '#cite_note-4',

### Find the number of titles that have changed in the United States Code since its last release point:

In [7]:
url = 'http://uscode.house.gov/download/download.shtml'

In [8]:
response = requests.get(url)
response.status_code

200

In [9]:
soup = BeautifulSoup(response.content)

In [10]:
soup.select('#us\/usc\/t16')[0].get_text()

'\n\n          Title 16 - Conservation\n\n        '

### Create a Python list with the top ten FBI's Most Wanted names:

In [11]:
url = 'https://www.fbi.gov/wanted/topten'

In [12]:
response = requests.get(url)
response.status_code

200

In [13]:
soup = BeautifulSoup(response.content)

In [14]:
h3 = soup.select('h3 > a')

In [15]:
names = []

for i in range(len(h3)):
    names.append(soup.select('h3 > a')[i].get_text())

names

['JASON DEREK BROWN',
 'ALEXIS FLORES',
 'JOSE RODOLFO VILLARREAL-HERNANDEZ',
 'RAFAEL CARO-QUINTERO',
 'YULAN ADONAY ARCHAGA CARIAS',
 'EUGENE PALMER',
 'BHADRESHKUMAR CHETANBHAI PATEL',
 'ALEJANDRO ROSALES CASTILLO',
 'ARNOLDO JIMENEZ',
 'OCTAVIANO JUAREZ-CORRO']

### Display the 20 latest earthquakes info (date, time, latitude, longitude and region name) by the EMSC as a pandas dataframe

In [16]:
url = 'https://www.emsc-csem.org/Earthquake/'

In [17]:
response = requests.get(url)
response.status_code
soup = BeautifulSoup(response.content)

In [18]:
soup.select('#tbody > tr')[0].select('td')[3].get_text()

'earthquake2022-05-02\xa0\xa0\xa017:31:44.018min ago'

In [19]:
soup.select('#tbody > tr')[0].select('td')[4].get_text()

'21.15\xa0'

In [20]:
soup.select('#tbody > tr')[0].select('td')[6].get_text()

'68.60\xa0'

In [21]:
soup.select('#tbody > tr')[0].select('td')[11].get_text()

'\xa0ANTOFAGASTA, CHILE'

In [22]:
datetime = []

for i in range(len(soup.select('#tbody > tr'))):
    dt = soup.select('#tbody > tr')[i].select('td')[3].get_text()
    dt = dt.replace('earthquake', '')
    datetime.append(str(dt))

In [23]:
date = []
time = []

for i in datetime:
    
    date.append(i.split('\xa0')[0])
    times = i.split('\xa0')[3]
    time.append(times.split('.')[0])

In [24]:
lat = []
lon = []
region = []

for i in range(len(soup.select('#tbody > tr'))):
    lat.append(soup.select('#tbody > tr')[i].select('td')[4].get_text().split('\xa0')[0])
    lon.append(soup.select('#tbody > tr')[i].select('td')[6].get_text().split('\xa0')[0])
    region.append(soup.select('#tbody > tr')[i].select('td')[11].get_text().split('\xa0')[1])

    
print(len(lat))
print(len(lon))
print(len(region))
print(len(time))
print(len(date))


50
50
50
50
50


In [25]:
earthquakes = pd.DataFrame({"date": date,
                            "time": time,
                            "latitude": lat,
                            "longitude": lon,
                            "region": region
                             })

earthquakes = earthquakes.head(20)
earthquakes

Unnamed: 0,date,time,latitude,longitude,region
0,2022-05-02,17:31:44,21.15,68.6,"ANTOFAGASTA, CHILE"
1,2022-05-02,17:00:30,31.6,131.9,"KYUSHU, JAPAN"
2,2022-05-02,16:47:43,43.08,18.14,BOSNIA AND HERZEGOVINA
3,2022-05-02,16:47:23,22.28,67.83,"POTOSI, BOLIVIA"
4,2022-05-02,16:33:01,31.33,68.66,"SAN JUAN, ARGENTINA"
5,2022-05-02,16:27:55,23.76,179.98,SOUTH OF FIJI ISLANDS
6,2022-05-02,16:16:17,38.43,14.52,"SICILY, ITALY"
7,2022-05-02,16:07:19,16.79,94.25,"OAXACA, MEXICO"
8,2022-05-02,15:56:49,44.65,7.06,NORTHERN ITALY
9,2022-05-02,15:53:57,1.31,120.48,"SULAWESI, INDONESIA"
