## Web Scraping

### Grabbing a Title

In [1]:
import requests
import bs4
from IPython.display import display, Image

In [2]:
# Internet connection is required!
result = requests.get("http://www.example.com")

In [3]:
type(result)

requests.models.Response

In [4]:
print(type(result.text))

<class 'str'>


In [5]:
result.text

'<!doctype html>\n<html>\n<head>\n    <title>Example Domain</title>\n\n    <meta charset="utf-8" />\n    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />\n    <meta name="viewport" content="width=device-width, initial-scale=1" />\n    <style type="text/css">\n    body {\n        background-color: #f0f0f2;\n        margin: 0;\n        padding: 0;\n        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;\n        \n    }\n    div {\n        width: 600px;\n        margin: 5em auto;\n        padding: 2em;\n        background-color: #fdfdff;\n        border-radius: 0.5em;\n        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);\n    }\n    a:link, a:visited {\n        color: #38488f;\n        text-decoration: none;\n    }\n    @media (max-width: 700px) {\n        div {\n            margin: 0 auto;\n            width: auto;\n        }\n    }\n    </style>    \n</head>\n\n<body>\n<div>\n    <

In [6]:
print(result.text)

<!doctype html>
<html>
<head>
    <title>Example Domain</title>

    <meta charset="utf-8" />
    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1" />
    <style type="text/css">
    body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
    </style>    
</head>

<body>
<div>
    <h1>Example Domain</h1>
    <p>This domai

In [7]:
result.headers

{'Content-Encoding': 'gzip', 'Age': '567208', 'Cache-Control': 'max-age=604800', 'Content-Type': 'text/html; charset=UTF-8', 'Date': 'Sat, 02 Mar 2024 16:21:04 GMT', 'Etag': '"3147526947+gzip"', 'Expires': 'Sat, 09 Mar 2024 16:21:04 GMT', 'Last-Modified': 'Thu, 17 Oct 2019 07:18:26 GMT', 'Server': 'ECS (nyd/D164)', 'Vary': 'Accept-Encoding', 'X-Cache': 'HIT', 'Content-Length': '648'}

In [8]:
print(result.headers)

{'Content-Encoding': 'gzip', 'Age': '567208', 'Cache-Control': 'max-age=604800', 'Content-Type': 'text/html; charset=UTF-8', 'Date': 'Sat, 02 Mar 2024 16:21:04 GMT', 'Etag': '"3147526947+gzip"', 'Expires': 'Sat, 09 Mar 2024 16:21:04 GMT', 'Last-Modified': 'Thu, 17 Oct 2019 07:18:26 GMT', 'Server': 'ECS (nyd/D164)', 'Vary': 'Accept-Encoding', 'X-Cache': 'HIT', 'Content-Length': '648'}


In [9]:
result.url

'http://www.example.com/'

In [10]:
result.request

<PreparedRequest [GET]>

In [11]:
result.connection

<requests.adapters.HTTPAdapter at 0x2306b3b7d60>

In [12]:
result.json

<bound method Response.json of <Response [200]>>

In [13]:
result.history

[]

In [14]:
result.ok

True

In [15]:
result.cookies

<RequestsCookieJar[]>

In [16]:
print(result.cookies)

<RequestsCookieJar[]>


In [17]:
print(result.history)

[]


In [18]:
print(result.url)

http://www.example.com/


In [19]:
print(result.connection)

<requests.adapters.HTTPAdapter object at 0x000002306B3B7D60>


In [20]:
print(result.links)

{}


In [21]:
print(result.reason)

OK


In [22]:
print(result.request)

<PreparedRequest [GET]>


In [23]:
print(result.status_code)

200


In [24]:
result.reason

'OK'

In [25]:
result.raw

<urllib3.response.HTTPResponse at 0x2306b3b7580>

In [26]:
result.status_code

200

In [27]:
result.raise_for_status

<bound method Response.raise_for_status of <Response [200]>>

In [28]:
result.is_permanent_redirect

False

In [29]:
result.apparent_encoding

'ascii'

In [30]:
result.connection

<requests.adapters.HTTPAdapter at 0x2306b3b7d60>

In [31]:
result.elapsed

datetime.timedelta(microseconds=910710)

In [32]:
result.next

In [33]:
result.encoding

'UTF-8'

In [34]:
import lxml

In [35]:
soup = bs4.BeautifulSoup(result.text,'lxml')
soup

<!DOCTYPE html>
<html>
<head>
<title>Example Domain</title>
<meta charset="utf-8"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<style type="text/css">
    body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
    </style>
</head>
<body>
<div>
<h1>Example Domain</h1>
<p>This domain is for use in illustrative examples

In [36]:
type(soup)

bs4.BeautifulSoup

In [37]:
help(bs4.BeautifulSoup)

Help on class BeautifulSoup in module bs4:

class BeautifulSoup(bs4.element.Tag)
 |  BeautifulSoup(markup='', features=None, builder=None, parse_only=None, from_encoding=None, exclude_encodings=None, element_classes=None, **kwargs)
 |  
 |  A data structure representing a parsed HTML or XML document.
 |  
 |  Most of the methods you'll call on a BeautifulSoup object are inherited from
 |  PageElement or Tag.
 |  
 |  Internally, this class defines the basic interface called by the
 |  tree builders when converting an HTML/XML document into a data
 |  structure. The interface abstracts away the differences between
 |  parsers. To write a new tree builder, you'll need to understand
 |  these methods as a whole.
 |  
 |  These methods will be called by the BeautifulSoup constructor:
 |    * reset()
 |    * feed(markup)
 |  
 |  The tree builder may call these methods from its feed() implementation:
 |    * handle_starttag(name, attrs) # See note about return value
 |    * handle_endtag(na

In [38]:
print(soup)

<!DOCTYPE html>
<html>
<head>
<title>Example Domain</title>
<meta charset="utf-8"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<style type="text/css">
    body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
    </style>
</head>
<body>
<div>
<h1>Example Domain</h1>
<p>This domain is for use in illustrative examples

In [39]:
soup.select('title')

[<title>Example Domain</title>]

In [40]:
print(soup.select('title'))

[<title>Example Domain</title>]


In [41]:
soup.select('p')

[<p>This domain is for use in illustrative examples in documents. You may use this
     domain in literature without prior coordination or asking for permission.</p>,
 <p><a href="https://www.iana.org/domains/example">More information...</a></p>]

In [42]:
print(soup.select('p'))

[<p>This domain is for use in illustrative examples in documents. You may use this
    domain in literature without prior coordination or asking for permission.</p>, <p><a href="https://www.iana.org/domains/example">More information...</a></p>]


In [43]:
soup.select('h1')

[<h1>Example Domain</h1>]

In [44]:
soup.select('H1')

[<h1>Example Domain</h1>]

In [45]:
soup.select('title')

[<title>Example Domain</title>]

In [46]:
soup.select('title')[0]

<title>Example Domain</title>

In [47]:
soup.select('title')[0].getText()

'Example Domain'

In [48]:
soup.select('p')[0].getText()

'This domain is for use in illustrative examples in documents. You may use this\n    domain in literature without prior coordination or asking for permission.'

In [49]:
soup.select('p')[1].getText()

'More information...'

In [50]:
site_paragraphs = soup.select('p')

In [51]:
site_paragraphs[0]

<p>This domain is for use in illustrative examples in documents. You may use this
    domain in literature without prior coordination or asking for permission.</p>

In [52]:
print(site_paragraphs)

[<p>This domain is for use in illustrative examples in documents. You may use this
    domain in literature without prior coordination or asking for permission.</p>, <p><a href="https://www.iana.org/domains/example">More information...</a></p>]


In [53]:
type(site_paragraphs)

bs4.element.ResultSet

In [54]:
type(site_paragraphs[0])

bs4.element.Tag

In [55]:
type(site_paragraphs[1])

bs4.element.Tag

In [56]:
site_paragraphs[0]

<p>This domain is for use in illustrative examples in documents. You may use this
    domain in literature without prior coordination or asking for permission.</p>

In [57]:
site_paragraphs[0].getText()

'This domain is for use in illustrative examples in documents. You may use this\n    domain in literature without prior coordination or asking for permission.'

In [58]:
print(site_paragraphs[0].getText())

This domain is for use in illustrative examples in documents. You may use this
    domain in literature without prior coordination or asking for permission.


## Grabbing a Class

In [59]:
res = requests.get('https://en.wikipedia.org/wiki/Grace_Hopper')

In [60]:
type(res)

requests.models.Response

In [61]:
soup = bs4.BeautifulSoup(res.text, 'lxml')

In [62]:
# soup

In [63]:
len(soup.select(".vector-toc-contents"))

1

In [64]:
soup.select(".vector-toc-contents")

[<ul class="vector-toc-contents" id="mw-panel-toc-list">
 <li class="vector-toc-list-item vector-toc-level-1" id="toc-mw-content-text">
 <a class="vector-toc-link" href="#">
 <div class="vector-toc-text">(Top)</div>
 </a>
 </li>
 <li class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded" id="toc-Early_life_and_education">
 <a class="vector-toc-link" href="#Early_life_and_education">
 <div class="vector-toc-text">
 <span class="vector-toc-numb">1</span>Early life and education</div>
 </a>
 <ul class="vector-toc-list" id="toc-Early_life_and_education-sublist">
 </ul>
 </li>
 <li class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded" id="toc-Career">
 <a class="vector-toc-link" href="#Career">
 <div class="vector-toc-text">
 <span class="vector-toc-numb">2</span>Career</div>
 </a>
 <button aria-controls="toc-Career-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle">
 <span class="vector-icon vector-ico

In [65]:
len(soup.select('.vector-toc-text'))

26

In [66]:
soup.select('.vector-toc-text')

[<div class="vector-toc-text">(Top)</div>,
 <div class="vector-toc-text">
 <span class="vector-toc-numb">1</span>Early life and education</div>,
 <div class="vector-toc-text">
 <span class="vector-toc-numb">2</span>Career</div>,
 <div class="vector-toc-text">
 <span class="vector-toc-numb">2.1</span>World War II</div>,
 <div class="vector-toc-text">
 <span class="vector-toc-numb">2.2</span>UNIVAC</div>,
 <div class="vector-toc-text">
 <span class="vector-toc-numb">2.3</span>COBOL</div>,
 <div class="vector-toc-text">
 <span class="vector-toc-numb">2.4</span>Standards</div>,
 <div class="vector-toc-text">
 <span class="vector-toc-numb">3</span>Retirement</div>,
 <div class="vector-toc-text">
 <span class="vector-toc-numb">4</span>Post-retirement</div>,
 <div class="vector-toc-text">
 <span class="vector-toc-numb">5</span>Anecdotes</div>,
 <div class="vector-toc-text">
 <span class="vector-toc-numb">6</span>Death</div>,
 <div class="vector-toc-text">
 <span class="vector-toc-numb">7</spa

In [67]:
soup.select(".vector-toc-numb")

[<span class="vector-toc-numb">1</span>,
 <span class="vector-toc-numb">2</span>,
 <span class="vector-toc-numb">2.1</span>,
 <span class="vector-toc-numb">2.2</span>,
 <span class="vector-toc-numb">2.3</span>,
 <span class="vector-toc-numb">2.4</span>,
 <span class="vector-toc-numb">3</span>,
 <span class="vector-toc-numb">4</span>,
 <span class="vector-toc-numb">5</span>,
 <span class="vector-toc-numb">6</span>,
 <span class="vector-toc-numb">7</span>,
 <span class="vector-toc-numb">8</span>,
 <span class="vector-toc-numb">8.1</span>,
 <span class="vector-toc-numb">8.2</span>,
 <span class="vector-toc-numb">9</span>,
 <span class="vector-toc-numb">9.1</span>,
 <span class="vector-toc-numb">9.2</span>,
 <span class="vector-toc-numb">9.3</span>,
 <span class="vector-toc-numb">9.3.1</span>,
 <span class="vector-toc-numb">10</span>,
 <span class="vector-toc-numb">11</span>,
 <span class="vector-toc-numb">12</span>,
 <span class="vector-toc-numb">13</span>,
 <span class="vector-toc-numb">

In [68]:
soup.select('.vector-toc-text')[0]

<div class="vector-toc-text">(Top)</div>

In [69]:
print(soup.select('.vector-toc-text')[1])

<div class="vector-toc-text">
<span class="vector-toc-numb">1</span>Early life and education</div>


In [70]:
soup.select('.vector-toc-text')[1].getText()

'\n1Early life and education'

In [71]:
print(soup.select('.vector-toc-text')[1].getText())


1Early life and education


In [72]:
first_item = soup.select('.vector-toc-text')[1]

In [73]:
first_item

<div class="vector-toc-text">
<span class="vector-toc-numb">1</span>Early life and education</div>

In [74]:
first_item.text

'\n1Early life and education'

In [75]:
soup.select('.vector-toc-text')[1].getText()

'\n1Early life and education'

In [76]:
soup.select('.vector-toc-text')[1].text

'\n1Early life and education'

In [77]:
print(first_item.text)


1Early life and education


In [78]:
for text in soup.select('.vector-toc-text'):
    print(text)    

<div class="vector-toc-text">(Top)</div>
<div class="vector-toc-text">
<span class="vector-toc-numb">1</span>Early life and education</div>
<div class="vector-toc-text">
<span class="vector-toc-numb">2</span>Career</div>
<div class="vector-toc-text">
<span class="vector-toc-numb">2.1</span>World War II</div>
<div class="vector-toc-text">
<span class="vector-toc-numb">2.2</span>UNIVAC</div>
<div class="vector-toc-text">
<span class="vector-toc-numb">2.3</span>COBOL</div>
<div class="vector-toc-text">
<span class="vector-toc-numb">2.4</span>Standards</div>
<div class="vector-toc-text">
<span class="vector-toc-numb">3</span>Retirement</div>
<div class="vector-toc-text">
<span class="vector-toc-numb">4</span>Post-retirement</div>
<div class="vector-toc-text">
<span class="vector-toc-numb">5</span>Anecdotes</div>
<div class="vector-toc-text">
<span class="vector-toc-numb">6</span>Death</div>
<div class="vector-toc-text">
<span class="vector-toc-numb">7</span>Dates of rank</div>
<div class="

In [79]:
for item in soup.select('.vector-toc-text')[1:]:
    print(item.text)    


1Early life and education

2Career

2.1World War II

2.2UNIVAC

2.3COBOL

2.4Standards

3Retirement

4Post-retirement

5Anecdotes

6Death

7Dates of rank

8Awards and honors

8.1Military awards

8.2Other awards

9Legacy

9.1Places

9.2Programs

9.3In popular culture

9.3.1Grace Hopper Celebration of Women in Computing

10See also

11Notes

12References

13Obituary notices

14Further reading

15External links


In [80]:
for text in soup.select('.vector-toc-text')[1:]:
    print(text.text)    


1Early life and education

2Career

2.1World War II

2.2UNIVAC

2.3COBOL

2.4Standards

3Retirement

4Post-retirement

5Anecdotes

6Death

7Dates of rank

8Awards and honors

8.1Military awards

8.2Other awards

9Legacy

9.1Places

9.2Programs

9.3In popular culture

9.3.1Grace Hopper Celebration of Women in Computing

10See also

11Notes

12References

13Obituary notices

14Further reading

15External links


## Grabbing an Image

In [81]:
res = requests.get('https://en.wikipedia.org/wiki/Deep_Blue_(chess_computer)')

In [82]:
soup = bs4.BeautifulSoup(res.text,'lxml')

In [83]:
# soup

In [84]:
# print(soup)

In [85]:
len(soup.select('img'))

17

In [86]:
soup.select('.img')

[]

In [87]:
print(soup.select('img'))

[<img alt="" aria-hidden="true" class="mw-logo-icon" height="50" src="/static/images/icons/wikipedia.png" width="50"/>, <img alt="Wikipedia" class="mw-logo-wordmark" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"/>, <img alt="The Free Encyclopedia" class="mw-logo-tagline" height="13" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" style="width: 7.3125em; height: 0.8125em;" width="117"/>, <img alt="This is a good article. Click here for more information." class="mw-file-element" data-file-height="185" data-file-width="180" decoding="async" height="20" src="//upload.wikimedia.org/wikipedia/en/thumb/9/94/Symbol_support_vote.svg/19px-Symbol_support_vote.svg.png" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/94/Symbol_support_vote.svg/29px-Symbol_support_vote.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/94/Symbol_support_vote.svg/39px-Symbol_support_vote.svg.png 2x" width="19"/>, <img class="mw-file-el

In [88]:
soup.select('img')[0]

<img alt="" aria-hidden="true" class="mw-logo-icon" height="50" src="/static/images/icons/wikipedia.png" width="50"/>

In [89]:
# src="//upload.wikimedia.org/wikipedia/en/thumb/9/94/Symbol_support_vote.svg/19px-Symbol_support_vote.svg.png"

In [90]:
soup.select('.infobox-image')

[<td class="infobox-image" colspan="2"><span class="mw-default-size" typeof="mw:File/Frameless"><a class="mw-file-description" href="/wiki/File:Deep_Blue.jpg"><img class="mw-file-element" data-file-height="601" data-file-width="400" decoding="async" height="331" src="//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/220px-Deep_Blue.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/330px-Deep_Blue.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/b/be/Deep_Blue.jpg 2x" width="220"/></a></span></td>]

In [91]:
len(soup.select(".mw-file-element"))

11

In [92]:
soup.select(".mw-file-element")

[<img alt="This is a good article. Click here for more information." class="mw-file-element" data-file-height="185" data-file-width="180" decoding="async" height="20" src="//upload.wikimedia.org/wikipedia/en/thumb/9/94/Symbol_support_vote.svg/19px-Symbol_support_vote.svg.png" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/94/Symbol_support_vote.svg/29px-Symbol_support_vote.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/94/Symbol_support_vote.svg/39px-Symbol_support_vote.svg.png 2x" width="19"/>,
 <img class="mw-file-element" data-file-height="601" data-file-width="400" decoding="async" height="331" src="//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/220px-Deep_Blue.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/330px-Deep_Blue.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/b/be/Deep_Blue.jpg 2x" width="220"/>,
 <img class="mw-file-element" data-file-height="64" data-file-width="64" decoding="async" height="150" src=

In [93]:
soup.select(".mw-file-element")[1]

<img class="mw-file-element" data-file-height="601" data-file-width="400" decoding="async" height="331" src="//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/220px-Deep_Blue.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/330px-Deep_Blue.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/b/be/Deep_Blue.jpg 2x" width="220"/>

In [94]:
computer = soup.select(".mw-file-element")[1]

In [95]:
computer

<img class="mw-file-element" data-file-height="601" data-file-width="400" decoding="async" height="331" src="//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/220px-Deep_Blue.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/330px-Deep_Blue.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/b/be/Deep_Blue.jpg 2x" width="220"/>

In [96]:
print(computer)

<img class="mw-file-element" data-file-height="601" data-file-width="400" decoding="async" height="331" src="//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/220px-Deep_Blue.jpg" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/330px-Deep_Blue.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/b/be/Deep_Blue.jpg 2x" width="220"/>


In [97]:
type(computer) # its Tag and not string
# so we can treat as a dict

bs4.element.Tag

In [98]:
computer['src']

'//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/220px-Deep_Blue.jpg'

In [99]:
print(computer['src'])

//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/220px-Deep_Blue.jpg


In [100]:
# paste below code in markdown
# <img src="//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/220px-Deep_Blue.jpg">

<img src="//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/220px-Deep_Blue.jpg">

In [101]:
computer = soup.select('.mw-file-element')[2]

In [102]:
computer['src']

'//upload.wikimedia.org/wikipedia/commons/thumb/5/52/Chess_Programming.svg/150px-Chess_Programming.svg.png'

<img src="//upload.wikimedia.org/wikipedia/commons/thumb/5/52/Chess_Programming.svg/150px-Chess_Programming.svg.png">

In [103]:
soup.select("img")

[<img alt="" aria-hidden="true" class="mw-logo-icon" height="50" src="/static/images/icons/wikipedia.png" width="50"/>,
 <img alt="Wikipedia" class="mw-logo-wordmark" src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;"/>,
 <img alt="The Free Encyclopedia" class="mw-logo-tagline" height="13" src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" style="width: 7.3125em; height: 0.8125em;" width="117"/>,
 <img alt="This is a good article. Click here for more information." class="mw-file-element" data-file-height="185" data-file-width="180" decoding="async" height="20" src="//upload.wikimedia.org/wikipedia/en/thumb/9/94/Symbol_support_vote.svg/19px-Symbol_support_vote.svg.png" srcset="//upload.wikimedia.org/wikipedia/en/thumb/9/94/Symbol_support_vote.svg/29px-Symbol_support_vote.svg.png 1.5x, //upload.wikimedia.org/wikipedia/en/thumb/9/94/Symbol_support_vote.svg/39px-Symbol_support_vote.svg.png 2x" width="19"/>,
 <img class="mw-fil

In [104]:
len(soup.select('img'))

17

In [105]:
for image in (soup.select("img")):
    print(image['src'])

/static/images/icons/wikipedia.png
/static/images/mobile/copyright/wikipedia-wordmark-en.svg
/static/images/mobile/copyright/wikipedia-tagline-en.svg
//upload.wikimedia.org/wikipedia/en/thumb/9/94/Symbol_support_vote.svg/19px-Symbol_support_vote.svg.png
//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/220px-Deep_Blue.jpg
//upload.wikimedia.org/wikipedia/commons/thumb/5/52/Chess_Programming.svg/150px-Chess_Programming.svg.png
//upload.wikimedia.org/wikipedia/commons/thumb/6/6f/Kasparov_Magath_1985_Hamburg-2.png/220px-Kasparov_Magath_1985_Hamburg-2.png
//upload.wikimedia.org/wikipedia/commons/thumb/8/83/One_of_Deep_Blue%27s_processors_%282586060990%29.jpg/220px-One_of_Deep_Blue%27s_processors_%282586060990%29.jpg
//upload.wikimedia.org/wikipedia/commons/thumb/0/05/Chess.svg/28px-Chess.svg.png
//upload.wikimedia.org/wikipedia/commons/thumb/0/05/Chess.svg/28px-Chess.svg.png
//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.

In [106]:
for i, image in enumerate(soup.select("img")):
    print(i+1, image['src'], '\n')

1 /static/images/icons/wikipedia.png 

2 /static/images/mobile/copyright/wikipedia-wordmark-en.svg 

3 /static/images/mobile/copyright/wikipedia-tagline-en.svg 

4 //upload.wikimedia.org/wikipedia/en/thumb/9/94/Symbol_support_vote.svg/19px-Symbol_support_vote.svg.png 

5 //upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/220px-Deep_Blue.jpg 

6 //upload.wikimedia.org/wikipedia/commons/thumb/5/52/Chess_Programming.svg/150px-Chess_Programming.svg.png 

7 //upload.wikimedia.org/wikipedia/commons/thumb/6/6f/Kasparov_Magath_1985_Hamburg-2.png/220px-Kasparov_Magath_1985_Hamburg-2.png 

8 //upload.wikimedia.org/wikipedia/commons/thumb/8/83/One_of_Deep_Blue%27s_processors_%282586060990%29.jpg/220px-One_of_Deep_Blue%27s_processors_%282586060990%29.jpg 

9 //upload.wikimedia.org/wikipedia/commons/thumb/0/05/Chess.svg/28px-Chess.svg.png 

10 //upload.wikimedia.org/wikipedia/commons/thumb/0/05/Chess.svg/28px-Chess.svg.png 

11 //upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_ca

In [107]:
Image(url="//upload.wikimedia.org/wikipedia/commons/thumb/8/83/Symbol_template_class_pink.svg/16px-Symbol_template_class_pink.svg.png")

In [108]:
display(Image(url="//upload.wikimedia.org/wikipedia/commons/thumb/5/52/Chess_Programming.svg/150px-Chess_Programming.svg.png"))

In [109]:
for image in (soup.select("img")):
    display(Image(url=image['src']))

In [110]:
import requests
from IPython.display import display, Image
from bs4 import BeautifulSoup

image_urls = [
    "/static/images/icons/wikipedia.png",
    "/static/images/mobile/copyright/wikipedia-wordmark-en.svg",
    "/static/images/mobile/copyright/wikipedia-tagline-en.svg",
    "//upload.wikimedia.org/wikipedia/en/thumb/9/94/Symbol_support_vote.svg/19px-Symbol_support_vote.svg.png",
    "//upload.wikimedia.org/wikipedia/commons/thumb/b/be/Deep_Blue.jpg/220px-Deep_Blue.jpg",
    "//upload.wikimedia.org/wikipedia/commons/thumb/5/52/Chess_Programming.svg/150px-Chess_Programming.svg.png",
    "//upload.wikimedia.org/wikipedia/commons/thumb/6/6f/Kasparov_Magath_1985_Hamburg-2.png/220px-Kasparov_Magath_1985_Hamburg-2.png",
    "//upload.wikimedia.org/wikipedia/commons/thumb/8/83/One_of_Deep_Blue%27s_processors_%282586060990%29.jpg/220px-One_of_Deep_Blue%27s_processors_%282586060990%29.jpg",
    "//upload.wikimedia.org/wikipedia/commons/thumb/0/05/Chess.svg/28px-Chess.svg.png",
    "//upload.wikimedia.org/wikipedia/commons/thumb/0/05/Chess.svg/28px-Chess.svg.png",
    "//upload.wikimedia.org/wikipedia/en/thumb/9/96/Symbol_category_class.svg/16px-Symbol_category_class.svg.png",
    "//upload.wikimedia.org/wikipedia/en/thumb/4/4a/Commons-logo.svg/12px-Commons-logo.svg.png",
    "//upload.wikimedia.org/wikipedia/commons/thumb/8/83/Symbol_template_class_pink.svg/16px-Symbol_template_class_pink.svg.png",
    "//upload.wikimedia.org/wikipedia/en/thumb/8/8a/OOjs_UI_icon_edit-ltr-progressive.svg/10px-OOjs_UI_icon_edit-ltr-progressive.svg.png",
    "https://login.wikimedia.org/wiki/Special:CentralAutoLogin/start?type=1x1",
    "/static/images/footer/wikimedia-button.png",
    "/static/images/footer/poweredby_mediawiki_88x31.png"
]

# Display all the images
for url in image_urls:
    display(Image(url=url))


#### To view an online image in Python, you can use libraries such as requests to fetch the image from a URL and PIL (Python Imaging Library) or opencv to display the image. Here's a simple example using requests and PIL:

In [111]:
import requests
from PIL import Image
from io import BytesIO

def view_online_image(url):
    # Fetch the image from the URL
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Open the image using PIL
        image = Image.open(BytesIO(response.content))
        
        # Display the image
        image.show()
    else:
        print("Failed to fetch the image. Status code:", response.status_code)

# Example usage
url = "https://upload.wikimedia.org/wikipedia/en/4/47/Iron_Man_%28circa_2018%29.png"
view_online_image(url)

In [112]:
image_link = requests.get('https://upload.wikimedia.org/wikipedia/commons/thumb/8/83/One_of_Deep_Blue%27s_processors_%282586060990%29.jpg/220px-One_of_Deep_Blue%27s_processors_%282586060990%29.jpg')

In [113]:
image_link

<Response [403]>

In [114]:
type(image_link)

requests.models.Response

In [115]:
len(('https://upload.wikimedia.org/wikipedia/commons/thumb/8/83/One_of_Deep_Blue%27s_processors_%282586060990%29.jpg/220px-One_of_Deep_Blue%27s_processors_%282586060990%29.jpg'))

169

In [116]:
# image_link.content

In [117]:
# save file
f = open('my_computer_image.jpg','wb') 

In [118]:
f

<_io.BufferedWriter name='my_computer_image.jpg'>

In [119]:
type(f)

_io.BufferedWriter

In [120]:
f.read

<function BufferedWriter.read>

In [121]:
# save file
f.write(image_link.content)

1932

In [122]:
f.close()

In [123]:
f = open('my_comp_image.jpg','wb')

In [124]:
f.write(image_link.content)

1932

In [125]:
f.read

<function BufferedWriter.read>

In [126]:
f.close()

In [127]:
f

<_io.BufferedWriter name='my_comp_image.jpg'>

In [128]:
print(f)

<_io.BufferedWriter name='my_comp_image.jpg'>


In [129]:
type(f)

_io.BufferedWriter

## Thank You