## Web Scraping Project 
by 
#### Ameenah Al-Haidari - Sep 10, 2023

# Grand-Seiko

![grandseiko1.png](attachment:grandseiko1.png)

### import Libraries

In [65]:
import pandas as pd
import csv
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [66]:
# Initialize the web driver
driver = webdriver.Chrome()  # You can use other browsers too
wait = WebDriverWait(driver, 10)  # Set a maximum wait time for locating elements

In [67]:
# Open the initial page
# url = "https://www.grand-seiko.com/ca-en/collections/all?page=1"
url = "https://www.grand-seiko.com/ca-en/collections/all?page=7"
driver.get(url)

In [68]:
# Scroll to load all watches
SCROLL_PAUSE_TIME = 2

last_height = driver.execute_script("return document.body.scrollHeight")

while True:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(SCROLL_PAUSE_TIME)
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height

In [69]:
# Find and extract watch links
watch_links = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'a.productCard')))
watch_urls = [link.get_attribute("href") for link in watch_links]

In [70]:
# Initialize CSV file for saving data
csv_filename = "AAgrandseiko_result.csv"
csv_headers = ["url", "Limited edition", "Reference Number", "Date", "collection", "Description", "Exterior:",
               "Case back:", "Glass", "Glass Material:", "Glass Coating:", "Case size:", "Band width:", "Band Material:", "Clasp type:","Movement", "Caliber no.:", "Accuracy:", "Functions", "Water resistance:", "Magnetic resistance:",
               "Other details / Features:", "url of images"]

In [71]:
with open(csv_filename, "w", newline="", encoding="utf-8") as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(csv_headers)

    # Iterate through each watch page
    for watch_url in watch_urls:
        driver.get(watch_url)
        watch_data = {"url": watch_url}
        try:
            watch_data["Reference Number"] = driver.find_element(By.CSS_SELECTOR, "h1._title").text
        except Exception as e:
            watch_data["Reference Number"] = "N/A"

            
        try:
            watch_data["Date"] = driver.find_element(By.CSS_SELECTOR, "p._arrival").text
        except Exception as e:
            watch_data["Date"] = "N/A"
            
        try:
            watch_data["Limited edition"] = driver.find_element(By.XPATH,
                                                                "//span[contains(text(), 'Limited')]")
            watch_data['Limited edition'] = watch_data['Limited edition'].text
        except Exception as e:
                watch_data["Limited edition"] = "N/A"
                
        try:
            watch_data["collection"] = driver.find_element(By.CSS_SELECTOR, "p._collection").text
        except Exception as e:
            watch_data["collection"] = "N/A"
            
        try:
            watch_data["Description"] = driver.find_element(By.CSS_SELECTOR, "p._shoulder").text
        except Exception as e:
            watch_data["Description"] = "N/A"


        table_rows = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'table._table tr')))
        for row in table_rows:
            value = row.find_element(By.TAG_NAME, 'td').text.strip()
            try:
                if row.find_element(By.TAG_NAME, 'th').text.strip():
                    header = row.find_element(By.TAG_NAME, 'th').text.strip()
                watch_data[header] = value
            except Exception as e:
                watch_data[header] += value

        # ... continue extracting other features
        image_urls = [img.get_attribute("src") for img in
                      driver.find_elements(By.CSS_SELECTOR, 'img[loading="lazy"]')]
        watch_data["url of images"] = ", ".join(image_urls)
        # Write the extracted data to the CSV file
        csv_writer.writerow([watch_data.get(header, "") for header in csv_headers])

# Close the web driver
driver.quit()


In [75]:
pd.read_csv("AAgrandseiko_result.csv")

Unnamed: 0,url,Limited edition,Reference Number,Date,collection,Description,Exterior:,Case back:,Glass,Glass Material:,...,Band Material:,Clasp type:,Movement,Caliber no.:,Accuracy:,Functions,Water resistance:,Magnetic resistance:,Other details / Features:,url of images
0,https://www.grand-seiko.com/ca-en/collections/...,"Limited edition of 1,700 pcs",SBGM253,Oct. 2023 Release,Elegance Collection,Automatic 3-Day Power Reserve GMT\nCaliber 9S ...,Stainless steel,See-through case backCase back with 6 screws,,Box shaped sapphire crystal,...,,Three-fold clasp with push button release,,9S66 Instructions,,,Splash Resistant,"4,800 A/m",LIMITED EDITION on the case back\nSerial numbe...,https://www.grand-seiko.com/ca-en/-/media/Imag...
1,https://www.grand-seiko.com/ca-en/collections/...,"Limited edition of 2,000 pcs",SBGJ275,Sep. 2023 Release,Sport Collection,Mechanical Hi-Beat 36000 GMT\nCaliber 9S 25th ...,"Stainless steel, Caseback:Stainless steel and ...",See-through & Screw case back,,Dual-curved sapphire crystal,...,,Three-fold clasp with push button release,,9S86 Instructions,,,20 bar,"4,800 A/m",Rotating bezel\nScrew-down crown\n37 jewels\n2...,https://www.grand-seiko.com/ca-en/-/media/Imag...
2,https://www.grand-seiko.com/ca-en/collections/...,,STGK019,,Elegance Collection,,Stainless steel,See-through & Screw case back,,Dual-curved sapphire crystal,...,,,,9S27 Instructions,,,10 bar,"4,800 A/m",Case with diamond(s)\nDial with diamond(s)\n35...,https://www.grand-seiko.com/ca-en/-/media/Imag...
3,https://www.grand-seiko.com/ca-en/collections/...,,SBGA481,,Sport Collection,,High-intensity titanium,See-through & Screw case back,,Dual-curved sapphire crystal,...,,Three-fold clasp with secure lock & push butto...,,9R65 Instructions,±15 seconds per month (±1 second per day),,20 bar,"4,800 A/m",Screw-down crown\nThree-fold clasp with secure...,https://www.grand-seiko.com/ca-en/-/media/Imag...
4,https://www.grand-seiko.com/ca-en/collections/...,,STGK021,,Elegance Collection,,Stainless steel,See-through & Screw case back,,Dual-curved sapphire crystal,...,,Three-fold clasp with push button release,,9S27 Instructions,,,10 bar,,Case with diamond(s)\n35 jewels,https://www.grand-seiko.com/ca-en/-/media/Imag...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,https://www.grand-seiko.com/ca-en/collections/...,,SBGM221,,Elegance Collection,,Stainless steel,See-through case backCase back with 6 screws,,Box shaped sapphire crystal,...,Crocodile leather,Three-fold clasp with push button release,,9S66 Instructions,,,Splash Resistant,"4,800 A/m",35 jewels\n24-hour hand (dual time display fun...,https://www.grand-seiko.com/ca-en/-/media/Imag...
115,https://www.grand-seiko.com/ca-en/collections/...,,SBGW231,,Elegance Collection,,Stainless steel,See-through case backCase back with 6 screws,,Box shaped sapphire crystal,...,Crocodile leather,,,9S64 Instructions,,,Splash Resistant,"4,800 A/m",24 jewels,https://www.grand-seiko.com/ca-en/-/media/Imag...
116,https://www.grand-seiko.com/ca-en/collections/...,,SBGR261,,Elegance Collection,,Stainless steel,See-through case backCase back with 6 screws,,Box shaped sapphire crystal,...,Crocodile leather,Three-fold clasp with push button release,,9S65 Instructions,,,Splash Resistant,"4,800 A/m",35 jewels,https://www.grand-seiko.com/ca-en/-/media/Imag...
117,https://www.grand-seiko.com/ca-en/collections/...,,SBGA413,,Heritage Collection,,High-intensity titanium,See-through & Screw case back,,Box shaped sapphire crystal,...,,Three-fold clasp with push button release,,9R65 Instructions,±15 seconds per month (±1 second per day),,10 bar,"4,800 A/m",Screw-down crown\nAllergy-safe metal\n30 jewel...,https://www.grand-seiko.com/ca-en/-/media/Imag...


### the End