# Import packages

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys

from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By

from selenium.webdriver.chrome.options import Options

import time

# STATICS

In [None]:
VENDOR_URL = 'https://www.synology.com/en-global/support/download'
PRODUCT_TYPE_SELECTOR = 'div.margin_bottom20 > select:nth-child(1)'
PRODUCT_SELECTOR = '//*[@id="heading_bg"]/div/div/div[2]/select'
NEWEST_OS_SELECTOR = '//*[@id="results"]/div[3]/div[2]/div[1]/div/div[1]/div[1]/div/div/div[1]'

# Selenium Webdriver Options, Download Path, Headless, Screensize, Webbrowser Version
options = Options()
options.headless = True

options.add_experimental_option("prefs", {
    "download.default_directory": r"/Users/kiril/Downloads/selenium_downloads"
})


# Initialize Chrome and open Vendor Website

In [None]:
class Synology_scraper:

    def __init__(
        self,
        url: str,
        headless: bool,
        options: Options,
    ):  
        print(f"headless: {options.headless}")
        self.driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
        self.url = url
        print('Initialized successfully')


    def open_website(self):
        try:
            self.driver.get(self.url)
            print('Opened Website')
        except:
            print('nix')
            pass

    def create_product_catalog(self):
        """_summary_

        Returns:
            dict: _description_
        """
        sel = Select(self.driver.find_element(By.CSS_SELECTOR, value=f"{PRODUCT_TYPE_SELECTOR}"))
        
        # set keys as product_lines
        product_catalog = dict.fromkeys([elem.text for elem in sel.options[1:]], None)
        # set values from products of product line
        for product in product_catalog.keys():
            sel.select_by_visible_text(product)
            selector_products = Select(self.driver.find_element(By.XPATH, value=f"{PRODUCT_SELECTOR}"))
            product_catalog[product] = [elem.text for elem in selector_products.options[1:]]
        print('created product_catalog')
        self.product_catalog = product_catalog

    def download_product(self) -> bool:
        """_summary_

        Returns:
            bool: _description_
        """

    def choose_product_line(self, product_line=str) -> None:
        sel = Select(self.driver.find_element(By.CSS_SELECTOR, value='div.margin_bottom20 > select:nth-child(1)'))
        
        sel.select_by_visible_text(product_line)
        selector_products = Select(self.driver.find_element(By.XPATH, value='//*[@id="heading_bg"]/div/div/div[2]/select'))
        #return [elem.text for elem in selector_products.options[1:]]

    def choose_product(self, product=str) -> (str,str,str):
        self.driver.implicitly_wait(10)
        time.sleep(1)
        selector_products = Select(self.driver.find_element(By.XPATH, value=f'{PRODUCT_SELECTOR}'))
        selector_products.select_by_visible_text(product)
        # newest OS Version
        self.driver.implicitly_wait(1)
        selector_OS = self.driver.find_element(By.XPATH, value=f'{NEWEST_OS_SELECTOR}')

        # return MD5 checksum and DSM newest OS Version and current URL
        return self.get_MD5_checksum(), selector_OS.text, self.driver.current_url
    
    def download_product(self, product=str) -> bool:
        """
        """
        try:
            el = self.driver.find_element(By.XPATH, value='//*[@id="results"]/div[3]/div[2]/div[1]/div/div[1]/div[1]/div/div/div[3]/div/div/div/div[1]/a')
            el.click()
            return True
        except:
            False
        
    def get_MD5_checksum(self) -> str:
        el = self.driver.find_element(By.XPATH, value='//*[@id="results"]/div[3]/div[2]/div[1]/div/div[1]/div[1]/div/div/div[3]/div/div/div/div[2]/div[2]/div/a')
        return el.get_attribute('title').replace('\n(Copy to Clipboard)','')

In [None]:
Syn = Synology_scraper(VENDOR_URL, headless=False, options=options)

In [None]:
Syn.open_website()

In [None]:
# 5 seconds
Syn.create_product_catalog()

In [None]:
Syn.choose_product_line('NAS')

In [None]:
Syn.choose_product('RS408')

In [None]:
#MD5 checksum and DSM newest OS Version and current URL

In [None]:
from tqdm import tqdm
import pandas as pd

result_df = pd.DataFrame(columns=[
                         'vendor', 'product_line', 'product', 'MD5', 'DSM', 'url', 'downloaded', 'exception_e'])

for product_line in Syn.product_catalog.keys():
    Syn.choose_product_line(product_line)

    for i, product in tqdm(enumerate(Syn.product_catalog[product_line][0:])):
        print(product_line, product)
        appendix = []
        appendix.append('Synology')
        appendix.append(product_line)
        appendix.append(str(product))
        try:
            md5, dsm, url = Syn.choose_product(f'{product}')
            appendix.append(md5)
            appendix.append(dsm)
            appendix.append(url)
            appendix.append('NotImplemented')
            appendix.append('')
        except Exception as e:
            appendix.append("")
            appendix.append("")
            appendix.append("")
            appendix.append("NotImplemented")
            appendix.append(str(e))
        result_df = result_df.append(pd.DataFrame([appendix], columns=result_df.columns), ignore_index=True)


In [None]:
result_df

In [None]:
result_df.to_csv('/Users/kiril/Downloads/selenium_downloads/everyone.csv')

In [None]:
! open .

In [None]:
! open /Users/kiril/Downloads/selenium_downloads/test2_waiting.csv

In [None]:
from tqdm import tqdm
for i, product in tqdm(enumerate(Syn.product_catalog[product_line][0:])):
    time.sleep(1)
    pass

# functions

In [None]:
def initialize_webdriver() -> bool:
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

def open_website() -> bool:
    driver.get(f"{VENDOR_URL}")



def choose_product_line(product_line=str) -> None:
    
    sel = Select(driver.find_element(By.CSS_SELECTOR, value='div.margin_bottom20 > select:nth-child(1)'))
    sel.select_by_visible_text(product_line)
    selector_products = Select(driver.find_element(By.XPATH, value='//*[@id="heading_bg"]/div/div/div[2]/select'))
    return [elem.text for elem in selector_products.options[1:]]

def choose_product(self, product_catalog=dict, product=str) -> None:
    selector_products = Select(self.driver.find_element(By.XPATH, value='//*[@id="heading_bg"]/div/div/div[2]/select'))
    self.driver.implicitly_wait(2)
    selector_products.select_by_visible_text(product)
    # newest OS Version
    selector_OS =self.driver.find_element(By.XPATH, value='//*[@id="results"]/div[3]/div[2]/div[1]/div/div[1]/div[1]/div/div/div[1]')
    
    print(f'MD5: {get_MD5_checksum()}')
    # download
    print(f"Newest OS Version: {selector_OS.text}")
    
    
def download_product(product_catalog=dict, product=str) -> bool:
    """
    """
    #product_line_lookup(product)
    el = driver.find_element(By.XPATH, value='//*[@id="results"]/div[3]/div[2]/div[1]/div/div[1]/div[1]/div/div/div[3]/div/div/div/div[1]/a')
    el.click()
    
def get_MD5_checksum() -> str:
    #el = driver.find_element(By.XPATH, value='//*[@id="results"]/div[3]/div[2]/div[1]/div/div[1]/div[1]/div/div/div[3]/div/div/div/div[2]')
    #el.click()
    #el = driver.find_element(By.XPATH, value='//*[@id="results"]/div[3]/div[2]/div[1]/div/div[1]/div[1]/div/div/div[3]/div/div/div/div[2]/div[2]/div/a')
    #el.click()
    el = driver.find_element(By.XPATH, value='//*[@id="results"]/div[3]/div[2]/div[1]/div/div[1]/div[1]/div/div/div[3]/div/div/div/div[2]/div[2]/div/a')
    return el.get_attribute('title').replace('\n(Copy to Clipboard)','')

In [None]:
# all product lines and their corresponding products in one product catalogue as dynamic dictionary
product_catalog = create_product_catalog()
product_catalog

In [None]:
# returns list of strings of products from selected product_line 
choose_product_line('Network')

In [None]:
# funktioniert nur richtig in jupyter notebook, wenn die seite visuell auch geladen ist :-)
# könnte an jupyter notebook liegen
product = 'RT6600ax'
choose_product(product_catalog, product)
# output is MD5 checksum and newest available OS Version

# start local download

In [None]:
# TODO argument missing to parse download_folder ( should actually be a Chrome config file)
download_product(product_catalog, product)

# compare MD5 checksum 

# close browser

In [None]:
driver.quit()

# QNAP test with refactoring / only partly working yet

In [None]:
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get("https://www.qnap.com/en-me/download?model=hs-210&category=firmware")

In [None]:
el = driver.find_element(By.XPATH, value='//*[@id="download_center"]/div/div[1]/div/div[1]/div[1]/div/select')
sel = Select(el)

In [None]:
product_catalog2 = dict.fromkeys([el.text for el in sel.options])

In [None]:
product_catalog2

In [None]:
def find_options_in_select(XPATH_str=str, skip=0) -> (Select, list):
    
    """
    xpath: xpath to dropdown menue / select. 
    skip: how many options to skip (sometimes first element is "please select product type", we dont need that)
    returns: list of str of dropdown elements
    """
    
    select = Select(driver.find_element(By.XPATH, value=XPATH_str))
    return select, [elem.text for elem in select.options[skip:]]

In [None]:
product_catalog = {}
s, l = find_options_in_select('//*[@id="download_center"]/div/div[1]/div/div[1]/div[1]/div/select')
product_catalog = product_catalog.fromkeys(l)

In [None]:
for key in product_catalog.keys():
    print(key)
    s.select_by_visible_text(key)
    # product_catalog[key] =

In [None]:
for element in l:
    s.select_by_visible_text(element)
    s, l = find_options_in_select('//*[@id="download_center"]/div/div[1]/div/div[1]/div[2]/div/select', skip=1)

In [None]:
s, l = find_options_in_select('//*[@id="download_center"]/div/div[1]/div/div[1]/div[2]/div/select', skip=1)

In [None]:
s.select_by_visible_text(l[0])

In [None]:
s, l = find_options_in_select('//*[@id="model-select"]', skip=1)

In [None]:
l

In [None]:
# set values from products of product line
for product in product_catalog2.keys():
    sel.select_by_visible_text(product)
    
    product_catalog2[product] = [elem.text for elem in selector_products.options[1:]]

In [None]:
for key, item in product_catalog2.items():
    print(key)
    if key == 'NAS / Expansion':
        print(item)
    else:
        