# Code to extract TV info from Good Guys

In [1]:
# Dependencies
import requests
from bs4 import BeautifulSoup as bs

There are 136 televisions listed on the Good Guys website contained in 3 webpages (60 per page) OR 2 webpages (100 per page)

In [2]:
# URL of pages to be scraped

urls = list() # Create list of urls for loop

urls.append("https://www.thegoodguys.com.au/televisions/all-tvs#facet:&productBeginIndex:0&orderBy:&pageView:grid&minPrice:&maxPrice:&pageSize:60&") 
urls.append("https://www.thegoodguys.com.au/televisions/all-tvs#facet:&productBeginIndex:60&orderBy:&pageView:grid&minPrice:&maxPrice:&pageSize:60&")
urls.append("https://www.thegoodguys.com.au/televisions/all-tvs#facet:&productBeginIndex:120&orderBy:&pageView:grid&minPrice:&maxPrice:&pageSize:60&")

urls

['https://www.thegoodguys.com.au/televisions/all-tvs#facet:&productBeginIndex:0&orderBy:&pageView:grid&minPrice:&maxPrice:&pageSize:60&',
 'https://www.thegoodguys.com.au/televisions/all-tvs#facet:&productBeginIndex:60&orderBy:&pageView:grid&minPrice:&maxPrice:&pageSize:60&',
 'https://www.thegoodguys.com.au/televisions/all-tvs#facet:&productBeginIndex:120&orderBy:&pageView:grid&minPrice:&maxPrice:&pageSize:60&']

In [3]:
# Create function to retrieve url page, create Beautiful Soup object, 
# parse and get results for loop.

def get_results(page_no): 

    # Retrieve page with the requests module
    response = requests.get(urls[page_no])

    # Create a Beautiful Soup object; parse with 'html.parser'
    soup = bs(response.text, "html.parser")

    # results are returned as an iterable list
    results = soup.select("#product_listing_tab")[0].find_all("li")
    
    return results
      
# Example: to find the Product Category for the first result
print(f"There are {len(get_results(1))} results on this page")

There are 60 results on this page


In [28]:
results = get_results(1)

## Find Product Category

In [5]:
# Create function to get product category for loop

def category_finder(x):
    onclick = results[x].a["onclick"]
    start = onclick.rfind("Product Category L1 :") + len("Product Category L1 :")
    end = onclick.rfind("','TGGCATLPEventAction':'Product Category L2 :")
    category = onclick[start:end]
    return category

# Example: to find the Product Category for the first result
print(category_finder(0))

Televisions


## Find Currency Code

In [6]:
# Create function to get currency code for loop

def currency_finder(x):
    scripts = results[x].script.find_next("script").string
    start = scripts.rfind("currencyCode")+len("currencyCode")+4
    end = start + 3
    currency = scripts[start:end]
    return currency

# Example: to find the Currency Code for the first result
print(currency_finder(0))

AUD


## Find Brand

In [7]:
# Create function to get brand for loop

def brand_finder(x):
    brand = results[x].select('input[type="hidden"]')[3]["value"]
    return brand

# Example: to find the Brand for the first result
print(brand_finder(0))

Sony


## Find Name

In [8]:
# Create function to get name for loop

def name_finder(x):
    brand = results[x].select('input[type="hidden"]')[3]["value"]
    name = results[x].select('input[type="hidden"]')[4]["value"]
    return brand + " " + name

# Example: to find the Product Name for the first result
print(name_finder(0))

Sony 32"(81cm) FHD LED LCD Smart TV


## Find Screen Size

In [9]:
# Create function to get screen size for loop

def size_finder(x):
    name = results[x].select('input[type="hidden"]')[4]["value"]
    size = name.split()[0]
    return size

# Example: to find the Screen Size for the first result
print(size_finder(0))

32"(81cm)


## Find Price

In [10]:
# Create function to get price for loop

def price_finder(x):
    price = results[x].select('input[type="hidden"]')[5]["value"]
    return price

# Example: to find the Price for the first result
print(price_finder(0))

$595.00


## Find Link to Product Image

In [11]:
# Create function to get image link for loop

def image_finder(x):
    image = results[0].img["data-src"]
    return image

# Example: to find the link to Product Image for the first result
print(image_finder(0))

//thegoodguys.sirv.com/products/50048259/50048259_511653.PNG?scale.height=215&scale.width=215&canvas.height=215&canvas.width=215&canvas.opacity=0


## The Loop

In [12]:
# Create lists to hold values

categories = list()
currencies = list()
brands = list()
names = list()
sizes = list()
prices = list()
images = list()

In [29]:
# Loop through first set of returned results

x = 0

for x in range(len(results)):
    
    try:
        
        category = category_finder(x)
        currency = currency_finder(x)
        brand = brand_finder(x)
        name = name_finder(x)
        size = size_finder(x)
        price = price_finder(x)
        image = image_finder(x)
        
        if (category and currency and brand and name and size and price and image):
            
                    categories.append(category)
                    currencies.append(currency)
                    brands.append(brand)
                    names.append(name)
                    sizes.append(size)
                    prices.append(price)
                    images.append(image)
                    
    except:
        print("Price not available: ",name_finder(x))
            
    x += 1

Price not available:  FFALCON 32" F1 HD LED TV
Price not available:  Sony 85" Z9G 8K UHD ANDROID LCD LED TV


In [30]:
# Check number of results - site lists 136 products in total

print(len(categories))
print(len(currencies))
print(len(brands))
print(len(names))
print(len(sizes))
print(len(prices))
print(len(images))

116
116
116
116
116
116
116


In [31]:
len(results)

60

In [32]:
print(names[0])
print(names[57])

Sony 32"(81cm) FHD LED LCD Smart TV
TCL 32" S615 HD ANDROID LED TV


In [33]:
print(names[58])
print(names[115])

Sony 32"(81cm) FHD LED LCD Smart TV
TCL 32" S615 HD ANDROID LED TV


In [35]:
import pandas as pd

df = pd.DataFrame({
        "category": "Televisions",
        "currency": currencies,
        "brand": brands,
        "name": names,
        "size": size,
        "price": prices,
        "image": images
})

df

Unnamed: 0,category,currency,brand,name,size,price,image
0,Televisions,AUD,Sony,"Sony 32""(81cm) FHD LED LCD Smart TV","32""",$595.00,//thegoodguys.sirv.com/products/50048259/50048...
1,Televisions,AUD,TCL,"TCL 40""(101cm) FHD LED LCD TV","32""",$445.00,//thegoodguys.sirv.com/products/50048259/50048...
2,Televisions,AUD,Hitachi,"Hitachi 32"" HD Smart LED TV","32""",$299.00,//thegoodguys.sirv.com/products/50048259/50048...
3,Televisions,AUD,Hitachi,"Hitachi 65"" UHD Smart LED TV","32""",$999.00,//thegoodguys.sirv.com/products/50048259/50048...
4,Televisions,AUD,Hitachi,"Hitachi 55""(140cm) UHD HDR LED LCD Smart TV","32""",$695.00,//thegoodguys.sirv.com/products/50048259/50048...
...,...,...,...,...,...,...,...
111,Televisions,AUD,Hisense,"Hisense 85"" Q8 4K UHD SMART ULED TV","32""",$4995.00,//thegoodguys.sirv.com/products/50048259/50048...
112,Televisions,AUD,TCL,"TCL 40"" S615 FHD ANDROID LED TV","32""",$495.00,//thegoodguys.sirv.com/products/50048259/50048...
113,Televisions,AUD,TCL,"TCL 65"" C815 4K PREMIUM UHD ANDROID QLED TV","32""",$1995.00,//thegoodguys.sirv.com/products/50048259/50048...
114,Televisions,AUD,TCL,"TCL 55"" C815 4K PREMIUM UHD ANDROID QLED TV","32""",$1495.00,//thegoodguys.sirv.com/products/50048259/50048...


In [27]:
df.describe()

Unnamed: 0,currency,brand,name,size,price,image
count,58,58,58,58,58,58
unique,1,9,58,1,43,1
top,AUD,Samsung,"Samsung Bezel Frame for 50"" Frame TVs Walnut","32""",$599.00,//thegoodguys.sirv.com/products/50048259/50048...
freq,58,15,1,58,3,58


In [40]:
df[df.duplicated()] # Page not refreshing when input new URL - need to fix

Unnamed: 0,category,currency,brand,name,size,price,image
58,Televisions,AUD,Sony,"Sony 32""(81cm) FHD LED LCD Smart TV","32""",$595.00,//thegoodguys.sirv.com/products/50048259/50048...
59,Televisions,AUD,TCL,"TCL 40""(101cm) FHD LED LCD TV","32""",$445.00,//thegoodguys.sirv.com/products/50048259/50048...
60,Televisions,AUD,Hitachi,"Hitachi 32"" HD Smart LED TV","32""",$299.00,//thegoodguys.sirv.com/products/50048259/50048...
61,Televisions,AUD,Hitachi,"Hitachi 65"" UHD Smart LED TV","32""",$999.00,//thegoodguys.sirv.com/products/50048259/50048...
62,Televisions,AUD,Hitachi,"Hitachi 55""(140cm) UHD HDR LED LCD Smart TV","32""",$695.00,//thegoodguys.sirv.com/products/50048259/50048...
63,Televisions,AUD,Hitachi,"Hitachi 50""(127cm) UHD LED LCD Smart TV","32""",$699.00,//thegoodguys.sirv.com/products/50048259/50048...
64,Televisions,AUD,Hitachi,"Hitachi 40""(101cm) FHD LED LCD Smart TV","32""",$399.00,//thegoodguys.sirv.com/products/50048259/50048...
65,Televisions,AUD,Philips,"Philips 55""(140cm) UHD LED LCD Smart TV","32""",$699.00,//thegoodguys.sirv.com/products/50048259/50048...
66,Televisions,AUD,Philips,"Philips 50""(126cm) UHD LED LCD Smart TV","32""",$599.00,//thegoodguys.sirv.com/products/50048259/50048...
67,Televisions,AUD,Linsar,"Linsar 58"" 4K UHD HDR Smart TV","32""",$599.00,//thegoodguys.sirv.com/products/50048259/50048...
