# Web Scraping with BeautifulSoup

In [6]:
!pip install BeautifulSoup4



In [4]:
from bs4 import BeautifulSoup as soup

In [3]:
from urllib.request import urlopen as uReq

In [5]:
my_url = 'https://www.newegg.com/Video-Cards-Video-Devices/Category/ID-38?Tpk=graphics%20card'

In [8]:
# opening the connection, grabbing the page
uClient = uReq(my_url)

In [9]:
# offloads the content into a variable
page_html = uClient.read()

In [10]:
# close the connection
uClient.close()

In [11]:
# need to parse the html using the soup function
page_soup = soup(page_html, "html.parser")

In [12]:
# gives the header of the page
page_soup.h1

<h1 class="page-title-text">Video Cards &amp; Video Devices</h1>

In [13]:
# grabs each product
containers = page_soup.findAll("div",{"class":"item-container"})

In [14]:
len(containers)

13

In [15]:
containers[0]

<div class="item-container ">
<!--product image-->
<a class="item-img" href="https://www.newegg.com/Product/Product.aspx?Item=N82E16814932055&amp;ignorebbr=1">
<img alt="GIGABYTE Radeon RX 580 DirectX 12 GV-RX580IXEB-8GD 8GB 256-Bit GDDR5 ATX Video Card" class=" lazy-img" data-effect="fadeIn" data-src="//images10.newegg.com/NeweggImage/ProductImageCompressAll300/14-932-055-Z01.jpg" src="//c1.neweggimages.com/WebResource/Themes/2005/Nest/blank.gif" title="GIGABYTE Radeon RX 580 DirectX 12 GV-RX580IXEB-8GD 8GB 256-Bit GDDR5 ATX Video Card">
</img></a>
<div class="item-info">
<!--brand info-->
<div class="item-branding">
<a class="item-brand" href="https://www.newegg.com/GIGABYTE/BrandStore/ID-1314">
<img alt="GIGABYTE" class=" lazy-img" data-effect="fadeIn" data-src="//images10.newegg.com/Brandimage_70x28//Brand1314.gif" src="//c1.neweggimages.com/WebResource/Themes/2005/Nest/blank.gif" title="GIGABYTE"/>
</a>
<!--rating info-->
</div>
<!--description info-->
<a class="item-title" href="

In [16]:
container = containers[0]

In [17]:
container.a

<a class="item-img" href="https://www.newegg.com/Product/Product.aspx?Item=N82E16814932055&amp;ignorebbr=1">
<img alt="GIGABYTE Radeon RX 580 DirectX 12 GV-RX580IXEB-8GD 8GB 256-Bit GDDR5 ATX Video Card" class=" lazy-img" data-effect="fadeIn" data-src="//images10.newegg.com/NeweggImage/ProductImageCompressAll300/14-932-055-Z01.jpg" src="//c1.neweggimages.com/WebResource/Themes/2005/Nest/blank.gif" title="GIGABYTE Radeon RX 580 DirectX 12 GV-RX580IXEB-8GD 8GB 256-Bit GDDR5 ATX Video Card">
</img></a>

In [19]:
container.div.div.a

<a class="item-brand" href="https://www.newegg.com/GIGABYTE/BrandStore/ID-1314">
<img alt="GIGABYTE" class=" lazy-img" data-effect="fadeIn" data-src="//images10.newegg.com/Brandimage_70x28//Brand1314.gif" src="//c1.neweggimages.com/WebResource/Themes/2005/Nest/blank.gif" title="GIGABYTE"/>
</a>

In [20]:
container.div.div.a.img

<img alt="GIGABYTE" class=" lazy-img" data-effect="fadeIn" data-src="//images10.newegg.com/Brandimage_70x28//Brand1314.gif" src="//c1.neweggimages.com/WebResource/Themes/2005/Nest/blank.gif" title="GIGABYTE"/>

In [21]:
container.div.div.a.img["title"]

'GIGABYTE'

In [24]:
title_container = container.findAll("a",{"class":"item-title"})

In [25]:
title_container

[<a class="item-title" href="https://www.newegg.com/Product/Product.aspx?Item=N82E16814932055&amp;ignorebbr=1" title="View Details"><i class="icon-premier icon-premier-xsm"></i>GIGABYTE Radeon RX 580 DirectX 12 GV-RX580IXEB-8GD Video Card</a>]

In [26]:
title_container[0].text

'GIGABYTE Radeon RX 580 DirectX 12 GV-RX580IXEB-8GD Video Card'

In [32]:
container.findAll("li",{"class":"price-ship"})

[<li class="price-ship">
         $4.99 Shipping
     </li>]

In [33]:
shipping_container = container.findAll("li",{"class":"price-ship"})

In [36]:
shipping_container[0].text.strip()

'$4.99 Shipping'

In [38]:
filename = "products.csv"
f = open(filename, "w")

In [39]:
headers = "make, product_name, shipping\n"
f.write(headers)

29

In [40]:
for container in containers:
    make = container.div.div.a.img["title"]
    
    title_container = container.findAll("a",{"class":"item-title"})
    product_name = title_container[0].text
    
    shipping_container = container.findAll("li",{"class":"price-ship"})
    shipping = shipping_container[0].text.strip()
    
    print("make: "+ make)
    print("product_name: "+ product_name)
    print("shipping: "+ shipping)
    
    f.write(make + "," + product_name.replace(",", "|") +"," +shipping + "\n")
    
f.close()

make: GIGABYTE
product_name: GIGABYTE Radeon RX 580 DirectX 12 GV-RX580IXEB-8GD Video Card
shipping: $4.99 Shipping
make: MSI
product_name: MSI Radeon RX 570 DirectX 12 RX 570 ARMOR MK2 8G OC Video Card
shipping: Free Shipping
make: ASUS
product_name: ASUS ROG GeForce GTX 1070 Ti STRIX-GTX1070TI-A8G-GAMING Video Card
shipping: $4.99 Shipping
make: Sapphire Tech
product_name: Sapphire Radeon NITRO+ RX 580 8GB GDDR5 PCI-E Dual HDMI / DVI-D / Dual DP w/ Backplate SPECIAL EDITION (UEFI), 100411NT+8GSEL
shipping: Free Shipping
make: EVGA
product_name: EVGA GeForce GTX 1080 FTW GAMING ACX 3.0, 08G-P4-6286-KR, 8GB GDDR5X, RGB LED, 10CM FAN, 10 Power Phases, Double BIOS, DX12 OSD Support (PXOC)
shipping: $4.99 Shipping
make: GIGABYTE
product_name: GIGABYTE GeForce GTX 1080 Ti DirectX 12 GV-N108TGAMINGOC BLACK-11GD Video Card
shipping: $4.99 Shipping
make: ZOTAC
product_name: ZOTAC GeForce GTX 1070 Mini, ZT-P10700G-10M, 8GB GDDR5
shipping: $4.99 Shipping
make: ASUS
product_name: ASUS ROG GeForc