In [206]:
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin, urlparse
import os

import shutil # to save the image locally

In [207]:
def get_response(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    return soup

In [208]:
#Details required for each designer to get the data
designer_root_url = 'www.leebroom.com'
designer_url = 'http://www.leebroom.com/exhibition/'
name = 'Lee Broom'
location = 'London, United Kingdom'
soup = get_response(designer_url)

In [209]:
# Get all products urls:
def get_product_urls(soup, designer_url):
    products_url = []
    rows = soup.findAll("div", {"class": "box ProjectPage GB"})

    for row in rows:
        row = row.find('a')
        url = 'http://' + designer_root_url + row['href']
        products_url.append(url)

    print(len(products_url))   
    print(products_url)
    return products_url

In [210]:
def get_image_list(soup):
    image_list = []
    images = soup.findAll("div", {"id": "wrapper"})
    for image in images:
        img_links = image.findAll('img')
        for img in img_links:
            img_link = img['src']
            img_url = 'http://' + designer_root_url + '/' + img_link
            image_list.append(img_url)

    total_images = len(image_list)
    print(total_images)
    print(image_list)
    return image_list

In [211]:
#Writing details to csv

def write_to_csv(details):
    csv_columns = ['Name', 'Website', 'Location', 'Year', 'ProductName', 'Description', 'Total_images']
    data = details
    csv_file = "product_details.csv"
    try:
        with open(csv_file, 'a') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
            #writer.writeheader()
            for row in data:
                writer.writerow(row)
    except IOError:
        print("I/O error")

    print("Written successfully")

In [212]:
## Download images

def download_image(img_list, dir_name, product_name):
    dir_name = names[0]
    if not os.path.isdir(dir_name):
        os.makedirs(dir_name)
    for i, image_url in enumerate(img_list):
        #image_url = "http://www.leebroom.com/assets/Uploads/_resampled/croppedimage680825-01-Lee-Broom-Kaleidoscopia-Luke-Hayes2.jpg"
    
        filename = os.path.join(dir_name, product_name.strip() + str(i) +'.jpg')
        r = requests.get(image_url, stream = True)
        if r.status_code == 200:
            # Set decode_content value to True, otherwise the downloaded image file's size will be zero.
            r.raw.decode_content = True

            # Open a local file with wb ( write binary ) permission.
            with open(filename,'wb') as f:
                shutil.copyfileobj(r.raw, f)
            print('Image sucessfully Downloaded: ', filename)
        else:
            print('Image Couldn\'t be retreived: ', filename)

In [213]:
# Main Function

def get_product_details(root_soup, designer_url, name, designer_root_url, location):
    products_url = get_product_urls(root_soup, designer_url)

    for url in products_url:
        #Get product details
        print("processing : ", url)
        soup = get_response(url)
        para_text = ""
        rows = soup.findAll("div", {"class": "ProductInfo"})
        for row in rows:
            title = row.find('h1').text
            if '-' in title:
                title = title.split('-')
                name1 = title[0]
                name2 = title[1].split()[0]
                product_name = name1+name2
                year = title[1].split()[-1]
            elif ',' in title:
                title = title.split(',')
                product_name = title[0]
                year =title[1].split()[-1]
            else:
                product_name = title
                year = 'Notfound'
            print(product_name)
            print(year)
            p_tag = row.findAll("p")
            for para in p_tag:
                para_text += para.text
          
        #Get image list
        image_list = get_image_list(soup)
        
        #Save the details to csv
        details = []

        details.append({"Name": name, "Website": designer_root_url, "Location": location, 
                        'Year': year, 'ProductName': product_name, 'Description': para_text, 
                        'Total_images': len(image_list)})
        
        write_to_csv(details)
        print("written to csv successfully")
        
        #Download the images
        download_image(image_list, name, product_name)
        print("images downloaded successfully")
        


In [214]:
#Main function call
get_product_details(soup, designer_url, name, designer_root_url, location)

16
['http://www.leebroom.com/exhibition/kaleidoscopia-london-design-festival-2019/', 'http://www.leebroom.com/exhibition/nycxdesign-new-york-design-week-2019/', 'http://www.leebroom.com/exhibition/park-life-australia-and-asia-tour-2019/', 'http://www.leebroom.com/exhibition/observatory-london-design-festival-2018/', 'http://www.leebroom.com/exhibition/observatory-milan-and-new-york-design-weeks-2018/', 'http://www.leebroom.com/exhibition/on-reflection-london-design-festival-2017/', 'http://www.leebroom.com/exhibition/time-machine-milan-design-week-2017/', 'http://www.leebroom.com/exhibition/opticality-london-design-festival-2016/', 'http://www.leebroom.com/exhibition/salone-del-automobile-milan-design-week-2016/', 'http://www.leebroom.com/exhibition/lee-broom-new-york-store/', 'http://www.leebroom.com/exhibition/the-flower-shop/', 'http://www.leebroom.com/exhibition/the-department-store/', 'http://www.leebroom.com/exhibition/nouveau-rebel-london-design-festival-2014/', 'http://www.leeb

Image sucessfully Downloaded:  Lee Broom/Observatory London0.jpg
Image sucessfully Downloaded:  Lee Broom/Observatory London1.jpg
Image sucessfully Downloaded:  Lee Broom/Observatory London2.jpg
Image sucessfully Downloaded:  Lee Broom/Observatory London3.jpg
Image sucessfully Downloaded:  Lee Broom/Observatory London4.jpg
Image sucessfully Downloaded:  Lee Broom/Observatory London5.jpg
Image sucessfully Downloaded:  Lee Broom/Observatory London6.jpg
Image sucessfully Downloaded:  Lee Broom/Observatory London7.jpg
Image sucessfully Downloaded:  Lee Broom/Observatory London8.jpg
Image sucessfully Downloaded:  Lee Broom/Observatory London9.jpg
Image sucessfully Downloaded:  Lee Broom/Observatory London10.jpg
Image sucessfully Downloaded:  Lee Broom/Observatory London11.jpg
images downloaded successfully
processing :  http://www.leebroom.com/exhibition/observatory-milan-and-new-york-design-weeks-2018/
Observatory Milan
2018
11
['http://www.leebroom.com/assets/Uploads/_resampled/croppedima

Image sucessfully Downloaded:  Lee Broom/Opticality London0.jpg
Image sucessfully Downloaded:  Lee Broom/Opticality London1.jpg
Image sucessfully Downloaded:  Lee Broom/Opticality London2.jpg
Image sucessfully Downloaded:  Lee Broom/Opticality London3.jpg
Image sucessfully Downloaded:  Lee Broom/Opticality London4.jpg
Image sucessfully Downloaded:  Lee Broom/Opticality London5.jpg
Image sucessfully Downloaded:  Lee Broom/Opticality London6.jpg
Image sucessfully Downloaded:  Lee Broom/Opticality London7.jpg
Image sucessfully Downloaded:  Lee Broom/Opticality London8.jpg
Image sucessfully Downloaded:  Lee Broom/Opticality London9.jpg
Image sucessfully Downloaded:  Lee Broom/Opticality London10.jpg
Image sucessfully Downloaded:  Lee Broom/Opticality London11.jpg
Image sucessfully Downloaded:  Lee Broom/Opticality London12.jpg
Image sucessfully Downloaded:  Lee Broom/Opticality London13.jpg
images downloaded successfully
processing :  http://www.leebroom.com/exhibition/salone-del-automobil

Image sucessfully Downloaded:  Lee Broom/The Flower Shop0.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop1.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop2.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop3.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop4.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop5.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop6.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop7.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop8.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop9.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop10.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop11.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop12.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop13.jpg
Image sucessfully Downloaded:  Lee Broom/The Flower Shop14.jpg
images downloaded successfully
processing :  http://www.leebroom.

Image sucessfully Downloaded:  Lee Broom/Crystal Bulb Shop0.jpg
Image sucessfully Downloaded:  Lee Broom/Crystal Bulb Shop1.jpg
Image sucessfully Downloaded:  Lee Broom/Crystal Bulb Shop2.jpg
Image sucessfully Downloaded:  Lee Broom/Crystal Bulb Shop3.jpg
Image sucessfully Downloaded:  Lee Broom/Crystal Bulb Shop4.jpg
Image sucessfully Downloaded:  Lee Broom/Crystal Bulb Shop5.jpg
Image sucessfully Downloaded:  Lee Broom/Crystal Bulb Shop6.jpg
Image sucessfully Downloaded:  Lee Broom/Crystal Bulb Shop7.jpg
images downloaded successfully
processing :  http://www.leebroom.com/exhibition/public-house-milan-2012/
Public House
2012
16
['http://www.leebroom.com/assets/Uploads/_resampled/croppedimage680825-Public-house-2.jpg', 'http://www.leebroom.com/assets/Uploads/_resampled/croppedimage680825-Public-house-3.jpg', 'http://www.leebroom.com/assets/Uploads/_resampled/croppedimage680825-Public-house-8.jpg', 'http://www.leebroom.com/assets/Uploads/_resampled/croppedimage680825-Public-house-12.jp