# Code to extract TV info from Good Guys

In [1]:
# Dependencies
import requests
from bs4 import BeautifulSoup as bs
from time import sleep
import pandas as pd
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
# from selenium.webdriver import ActionChains
from webdriver_manager.chrome import ChromeDriverManager

There are 136 televisions listed on the Good Guys website contained in 3 webpages (60 per page) so we will be using Splinter to help us automate through the pages

In [None]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

url = "https://www.thegoodguys.com.au/televisions/all-tvs"
browser.visit(url)

## Automate Browser Navigation

In [None]:
# ensuring the link is in view
browser.execute_script('window.scrollTo(0, 600);')

browser.links.find_by_text("Next").click()

In [None]:
browser.is_element_visible_by_css('h5', wait_time=10)

In [2]:
# URL of pages to be scraped

urls = list() # Create list of urls for loop

urls.append("https://www.thegoodguys.com.au/televisions/all-tvs#facet:&productBeginIndex:0&orderBy:&pageView:grid&minPrice:&maxPrice:&pageSize:60&") 
urls.append("https://www.thegoodguys.com.au/televisions/all-tvs#facet:&productBeginIndex:60&orderBy:&pageView:grid&minPrice:&maxPrice:&pageSize:60&")
urls.append("https://www.thegoodguys.com.au/televisions/all-tvs#facet:&productBeginIndex:120&orderBy:&pageView:grid&minPrice:&maxPrice:&pageSize:60&")

urls

['https://www.thegoodguys.com.au/televisions/all-tvs#facet:&productBeginIndex:0&orderBy:&pageView:grid&minPrice:&maxPrice:&pageSize:60&',
 'https://www.thegoodguys.com.au/televisions/all-tvs#facet:&productBeginIndex:60&orderBy:&pageView:grid&minPrice:&maxPrice:&pageSize:60&',
 'https://www.thegoodguys.com.au/televisions/all-tvs#facet:&productBeginIndex:120&orderBy:&pageView:grid&minPrice:&maxPrice:&pageSize:60&']

In [3]:
# Create function to retrieve url page, create Beautiful Soup object, 
# parse and get results for loop.

def get_results(page_no): 

    # Retrieve page with the requests module
    response = requests.get(urls[page_no])

    # Create a Beautiful Soup object; parse with 'html.parser'
    soup = bs(response.text, "html.parser")

    # results are returned as an iterable list
    results = soup.select("#product_listing_tab")[0].find_all("li")
    
    return results
      
# Example: to find the Product Category for the first result
print(f"There are {len(get_results(1))} results on this page")

There are 60 results on this page


In [23]:
results = get_results(1)

## Find Product Category

In [5]:
# Create function to get product category for loop

def category_finder(x):
    onclick = results[x].a["onclick"]
    start = onclick.rfind("Product Category L1 :") + len("Product Category L1 :")
    end = onclick.rfind("','TGGCATLPEventAction':'Product Category L2 :")
    category = onclick[start:end]
    return category

# Example: to find the Product Category for the first result
print(category_finder(0))

Televisions


## Find Currency Code

In [6]:
# Create function to get currency code for loop

def currency_finder(x):
    scripts = results[x].script.find_next("script").string
    start = scripts.rfind("currencyCode")+len("currencyCode")+4
    end = start + 3
    currency = scripts[start:end]
    return currency

# Example: to find the Currency Code for the first result
print(currency_finder(0))

AUD


## Find Brand

In [7]:
# Create function to get brand for loop

def brand_finder(x):
    brand = results[x].select('input[type="hidden"]')[3]["value"]
    return brand

# Example: to find the Brand for the first result
print(brand_finder(0))

Sony


## Find Model Number

In [8]:
# Create function to get model number for loop

def model_finder(x):
    model = results[x].find("div", class_="product-tile-model").text
    return model

# Example: to find the Model Number for the first result
print(model_finder(0))

KDL32W660E


## Find Name

In [9]:
# Create function to get name for loop

def name_finder(x):
    brand = results[x].select('input[type="hidden"]')[3]["value"]
    name = results[x].select('input[type="hidden"]')[4]["value"]
    return brand + " " + name

# Example: to find the Product Name for the first result
print(name_finder(0))

Sony 32"(81cm) FHD LED LCD Smart TV


## Find Screen Size

In [10]:
# Create function to get screen size for loop

def size_finder(x):
    name = results[x].select('input[type="hidden"]')[4]["value"]
    size = name.split()[0]
    return size

# Example: to find the Screen Size for the first result
print(size_finder(0))

32"(81cm)


## Find Price

In [11]:
# Create function to get price for loop

def price_finder(x):
    price = results[x].select('input[type="hidden"]')[5]["value"]
    return price

# Example: to find the Price for the first result
print(price_finder(0))

$595.00


## Find Link to Product Image

In [12]:
# Create function to get image link for loop

def image_finder(x):
    image = results[x].img["data-src"]
    return image

# Example: to find the link to Product Image for the first result
print(image_finder(0))

//thegoodguys.sirv.com/products/50048259/50048259_511653.PNG?scale.height=215&scale.width=215&canvas.height=215&canvas.width=215&canvas.opacity=0


## The Loop

In [13]:
# Create lists to hold values

categories = list()
currencies = list()
brands = list()
models = list()
names = list()
sizes = list()
prices = list()
images = list()

In [24]:
# Loop through first set of returned results

x = 0

for x in range(len(results)):
    
    try:
        
        category = category_finder(x)
        currency = currency_finder(x)
        brand = brand_finder(x)
        model = model_finder(x)
        name = name_finder(x)
        size = size_finder(x)
        price = price_finder(x)
        image = image_finder(x)
        
        if (category and currency and brand and model and name and size and price and image):
            
                    categories.append(category)
                    currencies.append(currency)
                    brands.append(brand)
                    models.append(model)
                    names.append(name)
                    sizes.append(size)
                    prices.append(price)
                    images.append(image)
                    
    except:
        print("Price not available: ",name_finder(x))
            
    x += 1

Price not available:  FFALCON 32" F1 HD LED TV
Price not available:  Sony 85" Z9G 8K UHD ANDROID LCD LED TV


In [25]:
# Check number of results - site lists 136 products in total

print(len(categories))
print(len(currencies))
print(len(brands))
print(len(names))
print(len(sizes))
print(len(prices))
print(len(images))

116
116
116
116
116
116
116


In [16]:
len(results)

60

In [29]:
sizes

['32"(81cm)',
 '40"(101cm)',
 '32"',
 '65"',
 '55"(140cm)',
 '50"(127cm)',
 '40"(101cm)',
 '55"(140cm)',
 '50"(126cm)',
 '100"',
 '58"',
 '24"',
 '40"',
 '50"',
 '32"',
 '40"',
 '55"',
 '65"',
 '77"',
 '75"',
 '55"',
 '75"',
 '75"',
 '65"',
 '50"',
 '43"',
 '43"',
 '75"',
 '65"',
 'Bezel',
 '65"',
 '55"',
 '65"',
 '32"',
 '43"',
 '50"',
 '55"',
 '75"',
 '82"',
 '55"',
 '65"',
 '75"',
 '55"',
 '65"',
 '75"',
 '85"',
 '55"',
 '65"',
 '75"',
 '32"',
 '40"',
 '85"',
 '65"',
 '75"',
 '55"',
 '50"',
 '43"',
 '49"',
 '32"(81cm)',
 '40"(101cm)',
 '32"',
 '65"',
 '55"(140cm)',
 '50"(127cm)',
 '40"(101cm)',
 '55"(140cm)',
 '50"(126cm)',
 '100"',
 '58"',
 '24"',
 '40"',
 '50"',
 '32"',
 '40"',
 '55"',
 '65"',
 '77"',
 '75"',
 '55"',
 '75"',
 '75"',
 '65"',
 '50"',
 '43"',
 '43"',
 '75"',
 '65"',
 'Bezel',
 '65"',
 '55"',
 '65"',
 '32"',
 '43"',
 '50"',
 '55"',
 '75"',
 '82"',
 '55"',
 '65"',
 '75"',
 '55"',
 '65"',
 '75"',
 '85"',
 '55"',
 '65"',
 '75"',
 '32"',
 '40"',
 '85"',
 '65"',
 '75"',
 '

In [17]:
print(names[0])
print(names[56])

Sony 32"(81cm) FHD LED LCD Smart TV
LG 43" UN7300 4K UHD SMART LED TV


In [18]:
print(names[58])
print(names[115])

IndexError: list index out of range

In [26]:
import pandas as pd

df = pd.DataFrame({
        "retailer": "Good Guys",
        "category": "Televisions",
        "currency": currencies,
        "brand": brands,
        "model": models,
        "name": names,
        "size": size,
        "price": prices,
        "image": images
})

df

Unnamed: 0,category,currency,brand,model,name,size,price,image
0,Televisions,AUD,Sony,KDL32W660E,"Sony 32""(81cm) FHD LED LCD Smart TV","49""",$595.00,//thegoodguys.sirv.com/products/50048259/50048...
1,Televisions,AUD,TCL,40D3000F,"TCL 40""(101cm) FHD LED LCD TV","49""",$445.00,//thegoodguys.sirv.com/products/50052593/50052...
2,Televisions,AUD,Hitachi,32HDSM8,"Hitachi 32"" HD Smart LED TV","49""",$299.00,//thegoodguys.sirv.com/products/50064547/50064...
3,Televisions,AUD,Hitachi,65UHDSM8,"Hitachi 65"" UHD Smart LED TV","49""",$999.00,//thegoodguys.sirv.com/products/50064544/50064...
4,Televisions,AUD,Hitachi,55UHDSM8,"Hitachi 55""(140cm) UHD HDR LED LCD Smart TV","49""",$695.00,//thegoodguys.sirv.com/products/50063085/50063...
...,...,...,...,...,...,...,...,...
111,Televisions,AUD,Samsung,QA75Q95TAWXXY,"Samsung 75"" Q95T 4K UHD SMART QLED TV","49""",$5995.00,//thegoodguys.sirv.com/products/50070386/50070...
112,Televisions,AUD,LG,55UN7300PTC,"LG 55"" UN7300 4K UHD SMART LED TV","49""",$1195.00,//thegoodguys.sirv.com/products/50070525/50070...
113,Televisions,AUD,LG,50UN7300PTC,"LG 50"" UN7300 4K UHD SMART LED TV","49""",$995.00,//thegoodguys.sirv.com/products/50070526/50070...
114,Televisions,AUD,LG,43UN7300PTC,"LG 43"" UN7300 4K UHD SMART LED TV","49""",$895.00,//thegoodguys.sirv.com/products/50070527/50070...


In [20]:
df.describe()

Unnamed: 0,category,currency,brand,model,name,size,price,image
count,58,58,58,58,58,58,58,58
unique,1,1,10,58,58,1,39,58
top,Televisions,AUD,Samsung,UA50TU8000WXXY,"Samsung 82"" TU8000 4K UHD SMART LED TV","49""",$699.00,//thegoodguys.sirv.com/products/50065351/50065...
freq,58,58,20,1,1,58,4,1


In [27]:
df[df.duplicated()] # Page not refreshing when input new URL - need to fix

Unnamed: 0,category,currency,brand,model,name,size,price,image
58,Televisions,AUD,Sony,KDL32W660E,"Sony 32""(81cm) FHD LED LCD Smart TV","49""",$595.00,//thegoodguys.sirv.com/products/50048259/50048...
59,Televisions,AUD,TCL,40D3000F,"TCL 40""(101cm) FHD LED LCD TV","49""",$445.00,//thegoodguys.sirv.com/products/50052593/50052...
60,Televisions,AUD,Hitachi,32HDSM8,"Hitachi 32"" HD Smart LED TV","49""",$299.00,//thegoodguys.sirv.com/products/50064547/50064...
61,Televisions,AUD,Hitachi,65UHDSM8,"Hitachi 65"" UHD Smart LED TV","49""",$999.00,//thegoodguys.sirv.com/products/50064544/50064...
62,Televisions,AUD,Hitachi,55UHDSM8,"Hitachi 55""(140cm) UHD HDR LED LCD Smart TV","49""",$695.00,//thegoodguys.sirv.com/products/50063085/50063...
63,Televisions,AUD,Hitachi,50UHDSM8,"Hitachi 50""(127cm) UHD LED LCD Smart TV","49""",$699.00,//thegoodguys.sirv.com/products/50063086/50063...
64,Televisions,AUD,Hitachi,40FHDSM8,"Hitachi 40""(101cm) FHD LED LCD Smart TV","49""",$399.00,//thegoodguys.sirv.com/products/50063088/50063...
65,Televisions,AUD,Philips,55PUT6103/79,"Philips 55""(140cm) UHD LED LCD Smart TV","49""",$699.00,//thegoodguys.sirv.com/products/50063013/50063...
66,Televisions,AUD,Philips,50PUT6103/79,"Philips 50""(126cm) UHD LED LCD Smart TV","49""",$599.00,//thegoodguys.sirv.com/products/50063014/50063...
67,Televisions,AUD,Hisense,H100LDA,"Hisense 100"" 4K UHD SMART LASER TV","49""",$9995.00,//thegoodguys.sirv.com/products/50068151/50068...


In [22]:
df.to_csv("output/good_guys.csv")