<a href="https://colab.research.google.com/github/JoshuaThadi/Data-Science-Notes/blob/main/WebScrapingLab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Web Scraping Lab

## Beautiful Soap Assigment

In [None]:
! pip install html5lib



In [None]:
# !pip install request
! pip install bs4

Collecting bs4
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Installing collected packages: bs4
Successfully installed bs4-0.0.2


In [None]:
from bs4 import BeautifulSoup # this module helps in web scraping
import requests # this module helps us to download a web page

In [None]:
%%html
<!DOCTYPE html>
<html>
<head>
<title>Page Title</title>
</head>
<body>
<h3><b id='boldest'>Lebron James</b></h3>
<p> Salary: $ 92,000,000 </p>
<h3> Stephen Curry</h3>
<p> Salary: $85,000, 000 </p>
<h3> Kevin Durant </h3>
<p> Salary: $73,200, 000</p>
</body>
</html>

#### We can store it as a string in the variable HTML


In [None]:
html = "<!DOCTYPE html><html><head><title>Page Title</title></head><body><h3> \
<b id='boldest'>Lebron James</b></h3><p> Salary: $ 92,000,000 </p> \
<h3>Stephen Curry</h3><p> Salary: $85,000,000</p> \
<h3>Kevin Durant</h3><p> Salary: $73,200,000</p></body></html>"

To parse a document, pass it into the <code>BeautifulSoup</code> constructor. The <code>BeautifulSoup</code> object represents the document as a nested data structure.

In [None]:
soup = BeautifulSoup(html, 'html5lib')

In [None]:
# to display html in a nested structure
print(soup.prettify())

<!DOCTYPE html>
<html>
 <head>
  <title>
   Page Title
  </title>
 </head>
 <body>
  <h3>
   <b id="boldest">
    Lebron James
   </b>
  </h3>
  <p>
   Salary: $ 92,000,000
  </p>
  <h3>
   Stephen Curry
  </h3>
  <p>
   Salary: $85,000,000
  </p>
  <h3>
   Kevin Durant
  </h3>
  <p>
   Salary: $73,200,000
  </p>
 </body>
</html>



# Tags

In [None]:
tag_object = soup.title
print("Tag object : ", tag_object)

Tag object :  <title>Page Title</title>


In [None]:
print("Tag object type : ", type(tag_object))

Tag object type :  <class 'bs4.element.Tag'>


In [None]:
tag_object = soup.h3
tag_object

<h3> <b id="boldest">Lebron James</b></h3>

### Children, Parents, and Siblings


In [None]:
tag_child = tag_object.b
tag_child

<b id="boldest">Lebron James</b>

In [None]:
parent_tag = tag_child.parent
parent_tag

<h3> <b id="boldest">Lebron James</b></h3>

In [None]:
tag_object

<h3> <b id="boldest">Lebron James</b></h3>

In [None]:
# tag_object parent is the body element.
print(tag_object.parent)

<body><h3> <b id="boldest">Lebron James</b></h3><p> Salary: $ 92,000,000 </p> <h3>Stephen Curry</h3><p> Salary: $85,000,000</p> <h3>Kevin Durant</h3><p> Salary: $73,200,000</p></body>


In [None]:
# sibling is the paragraph elements
sibling_1 = tag_object.next_sibling
print(sibling_1)

<p> Salary: $ 92,000,000 </p>


In [None]:
sibling_2 = sibling_1.next_sibling
print(sibling_2)

 


In [None]:
sibling_3 = sibling_2.next_sibling
print(sibling_3)
sibling_3 = tag_object.next_sibling
print(sibling_3)

<h3>Stephen Curry</h3>
<p> Salary: $ 92,000,000 </p>


## Html Attributes

In [None]:
tag_child['id']

'boldest'

In [None]:
tag_child.attrs

{'id': 'boldest'}

In [None]:
tag_child.get('id')

'boldest'

## Navigating String

In [None]:
tag_string = tag_child.string
tag_string

'Lebron James'

In [None]:
type(tag_string)

bs4.element.NavigableString

In [None]:
# A NavigableString is similar to a Python string or Unicode string.
unicode_string = str(tag_string)
unicode_string

'Lebron James'

## Filter

In [None]:
%%html
<table>
  <tr>
    <td id='flight' >Flight No</td>
    <td>Launch site</td>
    <td>Payload mass</td>
   </tr>
  <tr>
    <td>1</td>
    <td><a href='https://en.wikipedia.org/wiki/Florida'>Florida</a></td>
    <td>300 kg</td>
  </tr>
  <tr>
    <td>2</td>
    <td><a href='https://en.wikipedia.org/wiki/Texas'>Texas</a></td>
    <td>94 kg</td>
  </tr>
  <tr>
    <td>3</td>
    <td><a href='https://en.wikipedia.org/wiki/Florida'>Florida<a> </td>
    <td>80 kg</td>
  </tr>
</table>

0,1,2
Flight No,Launch site,Payload mass
1,Florida,300 kg
2,Texas,94 kg
3,Florida,80 kg


In [None]:
table = "<table><tr><td id='flight'>Flight No</td><td>Launch site</td> \
<td>Payload mass</td></tr><tr> <td>1</td> \
<td><a href='https://en.wikipedia.org/wiki/Florida'>Florida<a></td> \
<td>300 kg</td></tr><tr><td>2</td> \
<td><a href='https://en.wikipedia.org/wiki/Texas'>Texas</a></td> \
<td>94 kg</td></tr><tr><td>3</td> \
<td><a href='https://en.wikipedia.org/wiki/Florida'>Florida<a> </td> \
<td>80 kg</td></tr></table>"

In [None]:
table_bs = BeautifulSoup(table, 'html5lib')
print(table_bs)

<html><head></head><body><table><tbody><tr><td id="flight">Flight No</td><td>Launch site</td> <td>Payload mass</td></tr><tr> <td>1</td> <td><a href="https://en.wikipedia.org/wiki/Florida">Florida</a><a></a></td> <td>300 kg</td></tr><tr><td>2</td> <td><a href="https://en.wikipedia.org/wiki/Texas">Texas</a></td> <td>94 kg</td></tr><tr><td>3</td> <td><a href="https://en.wikipedia.org/wiki/Florida">Florida</a><a> </a></td> <td>80 kg</td></tr></tbody></table></body></html>


## find_All

In [None]:
table_rows = table_bs.find_all('tr')
table_rows

[<tr><td id="flight">Flight No</td><td>Launch site</td> <td>Payload mass</td></tr>,
 <tr> <td>1</td> <td><a href="https://en.wikipedia.org/wiki/Florida">Florida</a><a></a></td> <td>300 kg</td></tr>,
 <tr><td>2</td> <td><a href="https://en.wikipedia.org/wiki/Texas">Texas</a></td> <td>94 kg</td></tr>,
 <tr><td>3</td> <td><a href="https://en.wikipedia.org/wiki/Florida">Florida</a><a> </a></td> <td>80 kg</td></tr>]

In [None]:
first_rows = table_rows[0]
first_rows

<tr><td id="flight">Flight No</td><td>Launch site</td> <td>Payload mass</td></tr>

In [None]:
print(type(first_rows))

<class 'bs4.element.Tag'>


In [None]:
first_rows.td

<td id="flight">Flight No</td>

In [None]:
for i, row in enumerate(table_rows):
    print("row : ", i, "is ", row)

row :  0 is  <tr><td id="flight">Flight No</td><td>Launch site</td> <td>Payload mass</td></tr>
row :  1 is  <tr> <td>1</td> <td><a href="https://en.wikipedia.org/wiki/Florida">Florida</a><a></a></td> <td>300 kg</td></tr>
row :  2 is  <tr><td>2</td> <td><a href="https://en.wikipedia.org/wiki/Texas">Texas</a></td> <td>94 kg</td></tr>
row :  3 is  <tr><td>3</td> <td><a href="https://en.wikipedia.org/wiki/Florida">Florida</a><a> </a></td> <td>80 kg</td></tr>


In [None]:
for i, row in enumerate(table_rows):
    print("row ", i + 1)
    cells = row.find_all('td')
    for j, cell in enumerate(cells):
        print('column ', j + 1, "cell ", cell)

row  1
column  1 cell  <td id="flight">Flight No</td>
column  2 cell  <td>Launch site</td>
column  3 cell  <td>Payload mass</td>
row  2
column  1 cell  <td>1</td>
column  2 cell  <td><a href="https://en.wikipedia.org/wiki/Florida">Florida</a><a></a></td>
column  3 cell  <td>300 kg</td>
row  3
column  1 cell  <td>2</td>
column  2 cell  <td><a href="https://en.wikipedia.org/wiki/Texas">Texas</a></td>
column  3 cell  <td>94 kg</td>
row  4
column  1 cell  <td>3</td>
column  2 cell  <td><a href="https://en.wikipedia.org/wiki/Florida">Florida</a><a> </a></td>
column  3 cell  <td>80 kg</td>


In [None]:
list_input = table_bs.find_all(name = ["tr", "td"])
list_input

[<tr><td id="flight">Flight No</td><td>Launch site</td> <td>Payload mass</td></tr>,
 <td id="flight">Flight No</td>,
 <td>Launch site</td>,
 <td>Payload mass</td>,
 <tr> <td>1</td> <td><a href="https://en.wikipedia.org/wiki/Florida">Florida</a><a></a></td> <td>300 kg</td></tr>,
 <td>1</td>,
 <td><a href="https://en.wikipedia.org/wiki/Florida">Florida</a><a></a></td>,
 <td>300 kg</td>,
 <tr><td>2</td> <td><a href="https://en.wikipedia.org/wiki/Texas">Texas</a></td> <td>94 kg</td></tr>,
 <td>2</td>,
 <td><a href="https://en.wikipedia.org/wiki/Texas">Texas</a></td>,
 <td>94 kg</td>,
 <tr><td>3</td> <td><a href="https://en.wikipedia.org/wiki/Florida">Florida</a><a> </a></td> <td>80 kg</td></tr>,
 <td>3</td>,
 <td><a href="https://en.wikipedia.org/wiki/Florida">Florida</a><a> </a></td>,
 <td>80 kg</td>]

## Attributes

In [None]:
table_bs.find_all(id="flight")

[<td id="flight">Flight No</td>]

In [None]:
list_input = table_bs.find_all(href="https://en.wikipedia.org/wiki/Florida")
list_input

[<a href="https://en.wikipedia.org/wiki/Florida">Florida</a>,
 <a href="https://en.wikipedia.org/wiki/Florida">Florida</a>]

In [None]:
table_bs.find_all('a', href=True)

[<a href="https://en.wikipedia.org/wiki/Florida">Florida</a>,
 <a href="https://en.wikipedia.org/wiki/Texas">Texas</a>,
 <a href="https://en.wikipedia.org/wiki/Florida">Florida</a>]

In [None]:
table_bs.find_all('a', href=False)

[<a></a>, <a> </a>]

In [None]:
soup.find_all(id = 'boldest')

[<b id="boldest">Lebron James</b>]

In [None]:
for i in table_bs:
    print(table_bs.find_all(string="Florida"))

['Florida', 'Florida']


## Find

In [None]:
%%html
<h3>Rocket Launch </h3>

<p>
<table class='rocket'>
  <tr>
    <td>Flight No</td>
    <td>Launch site</td>
    <td>Payload mass</td>
  </tr>
  <tr>
    <td>1</td>
    <td>Florida</td>
    <td>300 kg</td>
  </tr>
  <tr>
    <td>2</td>
    <td>Texas</td>
    <td>94 kg</td>
  </tr>
  <tr>
    <td>3</td>
    <td>Florida </td>
    <td>80 kg</td>
  </tr>
</table>
</p>
<p>

<h3>Pizza Party</h3>


<table class='pizza'>
  <tr>
    <td>Pizza Place</td>
    <td>Orders</td>
    <td>Slices </td>
   </tr>
  <tr>
    <td>Domino's Pizza</td>
    <td>10</td>
    <td>100</td>
  </tr>
  <tr>
    <td>Little Caesars</td>
    <td>12</td>
    <td >144 </td>
  </tr>
  <tr>
    <td>Papa John's </td>
    <td>15 </td>
    <td>165</td>
  </tr>


0,1,2
Flight No,Launch site,Payload mass
1,Florida,300 kg
2,Texas,94 kg
3,Florida,80 kg

0,1,2
Pizza Place,Orders,Slices
Domino's Pizza,10,100
Little Caesars,12,144
Papa John's,15,165


In [None]:
two_tables ="<h3>Rocket Launch </h3> \
<p><table class='rocket'> \
<tr><td>Flight No</td><td>Launch site</td><td>Payload mass</td></tr> \
<tr><td>1</td><td>Florida</td><td>300 kg</td></tr> \
<tr><td>2</td><td>Texas</td><td>94 kg</td></tr> \
<tr><td>3</td><td>Florida </td><td>80 kg</td></tr></table></p>\
<p><h3>Pizza Party</h3> \
<table class='pizza'> \
<tr><td>Pizza Place</td><td>Orders</td><td>Slices </td></tr> \
<tr><td>Domino's Pizza</td><td>10</td><td>100</td></tr> \
<tr><td>Little Caesars</td><td>12</td><td >144 </td></tr> \
<tr><td>Papa John's</td><td>15 </td><td>165</td></tr>"

In [None]:
two_tables_bs = BeautifulSoup(two_tables, 'html.parser')
print(two_tables_bs)

<h3>Rocket Launch </h3> <p><table class="rocket"> <tr><td>Flight No</td><td>Launch site</td><td>Payload mass</td></tr> <tr><td>1</td><td>Florida</td><td>300 kg</td></tr> <tr><td>2</td><td>Texas</td><td>94 kg</td></tr> <tr><td>3</td><td>Florida </td><td>80 kg</td></tr></table></p><p><h3>Pizza Party</h3> <table class="pizza"> <tr><td>Pizza Place</td><td>Orders</td><td>Slices </td></tr> <tr><td>Domino's Pizza</td><td>10</td><td>100</td></tr> <tr><td>Little Caesars</td><td>12</td><td>144 </td></tr> <tr><td>Papa John's</td><td>15 </td><td>165</td></tr></table></p>


In [None]:
two_tables_bs.find('table')

<table class="rocket"> <tr><td>Flight No</td><td>Launch site</td><td>Payload mass</td></tr> <tr><td>1</td><td>Florida</td><td>300 kg</td></tr> <tr><td>2</td><td>Texas</td><td>94 kg</td></tr> <tr><td>3</td><td>Florida </td><td>80 kg</td></tr></table>

In [None]:
two_tables_bs.find("table", class_ = "pizza")

<table class="pizza"> <tr><td>Pizza Place</td><td>Orders</td><td>Slices </td></tr> <tr><td>Domino's Pizza</td><td>10</td><td>100</td></tr> <tr><td>Little Caesars</td><td>12</td><td>144 </td></tr> <tr><td>Papa John's</td><td>15 </td><td>165</td></tr></table>

In [None]:
# Extract the Rocket Launch table
rocket_table = soup.find("table", {"class": "rocket"})
print(rocket_table)
# Extract the Pizza Party table
pizza_table = soup.find("table", {"class": "pizza"})
print(pizza_table)

None
None


## Downloading and scraping the contents of a web page

In [None]:
url = "http://www.ibm.com"

In [None]:
data = requests.get(url).text
print(data)


<!DOCTYPE HTML>
<html lang="en">
<head>
    
    
    
    
    
    
    
      
    
    
    
    
    <meta charset="UTF-8"/>
    <meta name="languageCode" content="en"/>
    <meta name="countryCode" content="us"/>
    <meta name="searchTitle" content="IBM - United States"/>
    <meta name="focusArea" content="Cross IBM - All"/>
    <title>IBM - United States</title>
    <script defer="defer" type="text/javascript" src="https://rum.hlx.page/.rum/@adobe/helix-rum-js@%5E2/dist/rum-standalone.js" data-routing="program=131558,environment=1281329,tier=publish"></script>
<link rel="icon" href="/content/dam/adobe-cms/default-images/favicon.svg"/>
    
    <meta name="description" content="For more than a century, IBM has been a global technology innovator, leading advances in AI, automation and hybrid cloud solutions that help businesses grow."/>
    <meta name="template" content="full-width-layout"/>
    <meta name="viewport" content="width=device-width, initial-s

In [None]:
soup = BeautifulSoup(data, "html5lib")
print(soup)

<!DOCTYPE html>
<html lang="en"><head>
    
    
    
    
    
    
    
      
    
    
    
    
    <meta charset="utf-8"/>
    <meta content="en" name="languageCode"/>
    <meta content="us" name="countryCode"/>
    <meta content="IBM - United States" name="searchTitle"/>
    <meta content="Cross IBM - All" name="focusArea"/>
    <title>IBM - United States</title>
    <script data-routing="program=131558,environment=1281329,tier=publish" defer="defer" src="https://rum.hlx.page/.rum/@adobe/helix-rum-js@%5E2/dist/rum-standalone.js" type="text/javascript"></script>
<link href="/content/dam/adobe-cms/default-images/favicon.svg" rel="icon"/>
    
    <meta content="For more than a century, IBM has been a global technology innovator, leading advances in AI, automation and hybrid cloud solutions that help businesses grow." name="description"/>
    <meta content="full-width-layout" name="template"/>
    <meta content="width=device-width, initial-scale=1" name="viewport"/>
    <meta conte

In [None]:
# scape all link
for link in soup.find_all('a', href = True):
      print(link.get('href'))

https://www.ibm.com/granite?lnk=dev
https://developer.ibm.com/technologies/artificial-intelligence?lnk=dev
https://www.ibm.com/products/watsonx-code-assistant?lnk=dev
https://www.ibm.com/watsonx/developer/?lnk=dev
https://www.ibm.com/thought-leadership/institute-business-value/report/ceo-generative-ai?lnk=bus
https://www.ibm.com/think/reports/ai-in-action?lnk=bus
https://www.ibm.com/products/watsonx-orchestrate/ai-agent-for-hr?lnk=bus
https://skillsbuild.org/adult-learners/explore-learning/artificial-intelligence?lnk=bus
https://www.ibm.com/artificial-intelligence?lnk=ProdC
https://www.ibm.com/hybrid-cloud?lnk=ProdC
https://www.ibm.com/consulting?lnk=ProdC


In [None]:
for link in soup.find_all('img'):
      print(link)
print(link.get('src'))
print(link)

None
<a href="https://www.ibm.com/consulting?lnk=ProdC">IBM Consulting</a>


In [None]:
# The below url contains an html table with data about colors and color codes.
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DA0321EN-SkillsNetwork/labs/datasets/HTMLColorCodes.html"

In [None]:
# get the contents of the webpage in text format and store in a variable called data
data = requests.get(url).text
print(data)

<html>
   <body>
      <h1>Partital List  of HTML5 Supported Colors</h1>
<table border ="1" class="main-table">
   <tr>
      <td>Number </td>
      <td>Color</td>
      <td>Color Name</td>
      <td>Hex Code<br>#RRGGBB</td>
      <td>Decimal Code<br>(R,G,B)</td>
   </tr>
   <tr>
      <td>1</td>
      <td style="background:lightsalmon;">&nbsp;</td>
      <td>lightsalmon</td>
      <td>#FFA07A</td>
      <td>rgb(255,160,122)</td>
   </tr>
   <tr>
      <td>2</td>
      <td style="background:salmon;">&nbsp;</td>
      <td>salmon</td>
      <td>#FA8072</td>
      <td>rgb(250,128,114)</td>
   </tr>
   <tr>
      <td>3</td>
      <td style="background:darksalmon;">&nbsp;</td>
      <td>darksalmon</td>
      <td>#E9967A</td>
      <td>rgb(233,150,122)</td>
   </tr>
   <tr>
      <td>4</td>
      <td style="background:lightcoral;">&nbsp;</td>
      <td>lightcoral</td>
      <td>#F08080</td>
      <td>rgb(240,128,128)</td>
   </tr>
   <tr>
      <td>5</td>
      <td style="background:coral;">

In [None]:
soup = BeautifulSoup(data, "html5lib")
print(soup)

<html><head></head><body>
      <h1>Partital List  of HTML5 Supported Colors</h1>
<table border="1" class="main-table">
   <tbody><tr>
      <td>Number </td>
      <td>Color</td>
      <td>Color Name</td>
      <td>Hex Code<br/>#RRGGBB</td>
      <td>Decimal Code<br/>(R,G,B)</td>
   </tr>
   <tr>
      <td>1</td>
      <td style="background:lightsalmon;"> </td>
      <td>lightsalmon</td>
      <td>#FFA07A</td>
      <td>rgb(255,160,122)</td>
   </tr>
   <tr>
      <td>2</td>
      <td style="background:salmon;"> </td>
      <td>salmon</td>
      <td>#FA8072</td>
      <td>rgb(250,128,114)</td>
   </tr>
   <tr>
      <td>3</td>
      <td style="background:darksalmon;"> </td>
      <td>darksalmon</td>
      <td>#E9967A</td>
      <td>rgb(233,150,122)</td>
   </tr>
   <tr>
      <td>4</td>
      <td style="background:lightcoral;"> </td>
      <td>lightcoral</td>
      <td>#F08080</td>
      <td>rgb(240,128,128)</td>
   </tr>
   <tr>
      <td>5</td>
      <td style="background:coral;"> </

In [None]:
table = soup.find('table'); print(table)

<table border="1" class="main-table">
   <tbody><tr>
      <td>Number </td>
      <td>Color</td>
      <td>Color Name</td>
      <td>Hex Code<br/>#RRGGBB</td>
      <td>Decimal Code<br/>(R,G,B)</td>
   </tr>
   <tr>
      <td>1</td>
      <td style="background:lightsalmon;"> </td>
      <td>lightsalmon</td>
      <td>#FFA07A</td>
      <td>rgb(255,160,122)</td>
   </tr>
   <tr>
      <td>2</td>
      <td style="background:salmon;"> </td>
      <td>salmon</td>
      <td>#FA8072</td>
      <td>rgb(250,128,114)</td>
   </tr>
   <tr>
      <td>3</td>
      <td style="background:darksalmon;"> </td>
      <td>darksalmon</td>
      <td>#E9967A</td>
      <td>rgb(233,150,122)</td>
   </tr>
   <tr>
      <td>4</td>
      <td style="background:lightcoral;"> </td>
      <td>lightcoral</td>
      <td>#F08080</td>
      <td>rgb(240,128,128)</td>
   </tr>
   <tr>
      <td>5</td>
      <td style="background:coral;"> </td>
      <td>coral</td>
      <td>#FF7F50</td>
      <td>rgb(255,127,80)</td>
   

In [None]:
for row in table.find_all('tr'):
      cols = row.find_all('td')
      color_name = cols[2].string
      color_code = cols[3].text
      print("{}--->{}".format(color_name, color_code))

Color Name--->Hex Code#RRGGBB
lightsalmon--->#FFA07A
salmon--->#FA8072
darksalmon--->#E9967A
lightcoral--->#F08080
coral--->#FF7F50
tomato--->#FF6347
orangered--->#FF4500
gold--->#FFD700
orange--->#FFA500
darkorange--->#FF8C00
lightyellow--->#FFFFE0
lemonchiffon--->#FFFACD
papayawhip--->#FFEFD5
moccasin--->#FFE4B5
peachpuff--->#FFDAB9
palegoldenrod--->#EEE8AA
khaki--->#F0E68C
darkkhaki--->#BDB76B
yellow--->#FFFF00
lawngreen--->#7CFC00
chartreuse--->#7FFF00
limegreen--->#32CD32
lime--->#00FF00
forestgreen--->#228B22
green--->#008000
powderblue--->#B0E0E6
lightblue--->#ADD8E6
lightskyblue--->#87CEFA
skyblue--->#87CEEB
deepskyblue--->#00BFFF
lightsteelblue--->#B0C4DE
dodgerblue--->#1E90FF


In [None]:
!pip install tabulate



In [None]:
from tabulate import tabulate

# Initialize a list to store rows
table_data = []

# Example loop to collect data
for row in table.find_all('tr'):
    cols = row.find_all('td')
    if len(cols) >= 4:  # Ensure there are enough columns
        color_name = cols[2].string.strip() if cols[2].string else ""
        color_code = cols[3].text.strip()
        table_data.append([color_name, color_code])  # Append data as a row

# Print the data in tabular format
headers = ["Color Name", "Color Code"]  # Define headers
print(tabulate(table_data, headers=headers, tablefmt="grid"))


+----------------+-----------------+
| Color Name     | Color Code      |
| Color Name     | Hex Code#RRGGBB |
+----------------+-----------------+
| lightsalmon    | #FFA07A         |
+----------------+-----------------+
| salmon         | #FA8072         |
+----------------+-----------------+
| darksalmon     | #E9967A         |
+----------------+-----------------+
| lightcoral     | #F08080         |
+----------------+-----------------+
| coral          | #FF7F50         |
+----------------+-----------------+
| tomato         | #FF6347         |
+----------------+-----------------+
| orangered      | #FF4500         |
+----------------+-----------------+
| gold           | #FFD700         |
+----------------+-----------------+
| orange         | #FFA500         |
+----------------+-----------------+
| darkorange     | #FF8C00         |
+----------------+-----------------+
| lightyellow    | #FFFFE0         |
+----------------+-----------------+
| lemonchiffon   | #FFFACD         |
+

## Scaping tables from a web page using pandas

In [None]:
# The below url contains an html table with data about colors and color codes.
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DA0321EN-SkillsNetwork/labs/datasets/HTMLColorCodes.html"

In [None]:
import pandas as pd
tables = pd.read_html(url)
print(tables)

[         0      1               2                 3                     4
0   Number  Color      Color Name  Hex Code #RRGGBB  Decimal Code (R,G,B)
1        1    NaN     lightsalmon           #FFA07A      rgb(255,160,122)
2        2    NaN          salmon           #FA8072      rgb(250,128,114)
3        3    NaN      darksalmon           #E9967A      rgb(233,150,122)
4        4    NaN      lightcoral           #F08080      rgb(240,128,128)
5        5    NaN           coral           #FF7F50       rgb(255,127,80)
6        6    NaN          tomato           #FF6347        rgb(255,99,71)
7        7    NaN       orangered           #FF4500         rgb(255,69,0)
8        8    NaN            gold           #FFD700        rgb(255,215,0)
9        9    NaN          orange           #FFA500        rgb(255,165,0)
10      10    NaN      darkorange           #FF8C00        rgb(255,140,0)
11      11    NaN     lightyellow           #FFFFE0      rgb(255,255,224)
12      12    NaN    lemonchiffon    

In [None]:
tables[0]

Unnamed: 0,0,1,2,3,4
0,Number,Color,Color Name,Hex Code #RRGGBB,"Decimal Code (R,G,B)"
1,1,,lightsalmon,#FFA07A,"rgb(255,160,122)"
2,2,,salmon,#FA8072,"rgb(250,128,114)"
3,3,,darksalmon,#E9967A,"rgb(233,150,122)"
4,4,,lightcoral,#F08080,"rgb(240,128,128)"
5,5,,coral,#FF7F50,"rgb(255,127,80)"
6,6,,tomato,#FF6347,"rgb(255,99,71)"
7,7,,orangered,#FF4500,"rgb(255,69,0)"
8,8,,gold,#FFD700,"rgb(255,215,0)"
9,9,,orange,#FFA500,"rgb(255,165,0)"
