In [75]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import requests
import re
from matplotlib import pyplot as plt
import time
import urllib
import spacy
from collections import defaultdict
import nltk
from nltk.tokenize import word_tokenize
from nltk.tokenize import RegexpTokenizer
import string
import pandas as pd

#Lib require for google drive
from googleapiclient.http import MediaFileUpload, MediaIoBaseUpload
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
from pydrive2.files import GoogleDriveFile
import mimetypes
import magic


In [2]:
def construct_url_params(url, kwargs):
    """
        Construct new url by adding query params to the url.
        
        Returns: new url
    """
    
    #if no query params add ? else add & at the end of url
    url+='?' if not '?' in url else '&'
    for i,j in kwargs.items():
        url+=f'{i}={j}&'

    #strip extra & at the end
    return url.rstrip('&')

In [22]:
class AmazonScraper:
    base_url = "https://www.amazon.com/s?k="

    def __init__(self, product_name, chrome_path=r"C:\Users\moink\Downloads\chromedriver-win64\chromedriver.exe"):
        
        service = webdriver.chrome.service.Service(executable_path=chrome_path)
        options = webdriver.ChromeOptions()
        options.add_argument("--start-maximized")
        options.add_experimental_option("detach", True)
        self.driver = webdriver.Chrome(service=service, options=options)

        self.search_url = self.base_url+ re.sub("\s+", "+", product_name)
        self.driver.get(self.search_url)
        
        
        
    def isNullElement(self, element):
        img = element.find_elements(By.XPATH, ".//img[@src]")
        if element.text.strip() or img:
            return False
        return True
    
    def find_element(self, element, locator, expression, list=True):
        result = element.find_elements(locator, expression)
        if list:
            return result
        
        return result[0] if result else None

    def getProductURLList(self):
        WebDriverWait(self.driver, 20).until(EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'a-section a-spacing-none a-spacing-top-small s-title-instructions-style')]")))
        
        qid = self.driver.find_element(By.XPATH, "//input[@name='qid']").get_attribute("value")
        total_page = int(self.driver.find_element(By.XPATH, "//div[@role='navigation']").find_element(By.XPATH, "//span[contains(@class, 's-pagination-item s-pagination-disabled')]").text)

        product_urls = []
        
        curr_page = 1
        
        while True: 
            for product in self.driver.find_elements(By.XPATH, "//a[contains(@class, 'a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal')]"):
                product_urls.append(product.get_attribute("href"))


            curr_page +=1
            
            if curr_page > total_page:
                break

            next_page_url = construct_url_params(self.search_url, {'page': curr_page, 'qid': qid, 'ref': f'sr_pg_{curr_page}'})
            self.driver.get(next_page_url)
        
        return product_urls
    
    def getLeftImage(self):
        return [img.get_attribute('src') for img in scraper.driver.find_elements(By.XPATH, "//div[@id='imageBlock']//div[@class='imgTagWrapper']//img")]

    def getProductNameAndIDFromURL(self):
        product_path = urllib.parse.urlparse(self.driver.current_url).path.strip('/').split('/')

        product_name = product_path[0].replace("-", " ")
        product_id = product_path[2]
        
        return product_name, product_id

    def parseCenterDiv(self):
        #parse centerDiv

        product_detail = {}
    #     required_div = ["featurebullets_feature_div", "bylineInfo_feature_div", "title_feature_div", "productOverview_feature_div"]
        
        centerDiv = self.driver.find_elements(By.XPATH, "//div[@id='ppd']")[0].find_element(By.XPATH, ".//div[@id='centerCol']")

        #get product title
        product_detail['product_title'] = centerDiv.find_element(By.ID, 'productTitle').text

        #get product brand
        product_detail['product_brand'] = re.sub("^Visit the|^Brand:|store$", "", centerDiv.find_element(By.ID, 'bylineInfo').text, flags=re.IGNORECASE).strip()
        product_detail['product_brand_url'] = centerDiv.find_element(By.ID, 'bylineInfo').get_attribute('href')

        #get customer reviews
        customer_reviews = centerDiv.find_elements(By.XPATH, "//div[@id='averageCustomerReviews']")
        if customer_reviews:
            product_detail['customer_reviews'] = customer_reviews[0].text.split("\n")[0]

        #get prdouct overview
#         product_detail['product_overview'] = {}
        
        product_overview_feature_div = centerDiv.find_elements(By.XPATH, "//div[@id='productOverview_feature_div']")
        if product_overview_feature_div:        
            soup = BeautifulSoup(product_overview_feature_div[0].get_attribute('innerHTML'), 'html.parser')
            for i in soup.findAll('tr'):
                td = i.findChildren('td')

                #below if elif are just for glance icons
                if td[0].find('table'):
                    td = td[0].findAll('td')[-1].findAll('span')
                elif td[0].find('img'):
                    td = td[1].findAll('span')

                product_detail[td[0].text.strip()] = td[1].text.strip()
                
        #parse about section
        #replace non ascii characters and continous spaces
        product_detail['product_about'] = ""
        product_about = centerDiv.find_elements(By.XPATH, ".//div[@id='featurebullets_feature_div']//ul")
        
        if product_about:
            product_detail['product_about'] = re.sub("\s+", " ", re.sub(r'[^\x00-\x7F]+', "", product_about[0].text))


        return product_detail

    def parseBottomDivs(self):

        productDescription = self.driver.find_elements(By.XPATH, "//div[@id='productDescription']")
        if productDescription:
            productDescription = productDescription[0].text.strip()
        product_config = {}
        misc = {}
        long_description = ""
        brand_story = ""
        detailBullets = self.driver.find_elements(By.XPATH, "//div[@id='detailBullets_feature_div' and not(@data-feature-name)]")

        if detailBullets:
            for li in detailBullets[0].find_elements(By.TAG_NAME, "li"):
                spans = li.find_elements(By.XPATH, ".//span/span")
                product_config[spans[0].text.replace(":","").strip()] = spans[1].text.strip()
        else:
            productDetails = self.driver.find_elements(By.XPATH, "//div[@id='productDetailsNonPets_feature_div']")
            if productDetails:
                tables = productDetails[0].find_elements(By.TAG_NAME, "table")
                for table in tables:
                    if scraper.isNullElement(table):
                        continue

                    if "productDetails_techSpec" in table.get_attribute('id'):
                        for tr in table.find_elements(By.TAG_NAME, "tr"):
                            th = tr.find_element(By.TAG_NAME, "th")
                            td = tr.find_element(By.TAG_NAME, "td")
                            product_config[th.text.strip()] = td.text.strip()

                    else:
                        for tr in table.find_elements(By.TAG_NAME, "tr"):
                            th = tr.find_element(By.TAG_NAME, "th")
                            td = tr.find_element(By.TAG_NAME, "td")
                            misc[th.text.strip()] = td.text.strip()


        aplus_feature_div = self.driver.find_elements(By.XPATH, "//div[@id='aplus_feature_div' and div and normalize-space()]")

        images = []
        if aplus_feature_div:

            long_description = aplus_feature_div[0].find_element(By.XPATH, ".//div[@id='aplus']/div").text
            images = [i.get_attribute("src") for i in aplus_feature_div[0].find_elements(By.TAG_NAME, "img")]

        aplus_BS_feature_div = self.driver.find_elements(By.XPATH, "//div[@id='aplusBrandStory_feature_div' and div and normalize-space()]")

        if aplus_BS_feature_div:
            brand_story = aplus_BS_feature_div[0].find_element(By.XPATH, ".//div[@id='aplus']/div").text

            images.extend([i.get_attribute("src") for i in aplus_BS_feature_div[0].find_elements(By.TAG_NAME, "img")])


        btf_contents = self.driver.find_elements(By.XPATH, "//div[contains(@id, 'btfContent') and div and normalize-space()]") 
        btf_description = ""
        for btf_content in btf_contents:
            tables =  btf_content.find_elements(By.TAG_NAME, "table")
            if tables:
                #first table is config and second is misc

                for i, table in enumerate(tables):
                    for tr in table.find_elements(By.TAG_NAME, "tr"):
                        td = tr.find_elements(By.TAG_NAME, "td")
                        product_config[td[0].text.strip()] = td[1].text.strip()

            else:
                btf_description += btf_content.text.strip()
            images.extend([i.get_attribute("src") for i in btf_content.find_elements(By.TAG_NAME, "img")])

        if not long_description:
            long_description = btf_description
        elif not brand_story:
            brand_story = btf_description
        else:
            misc['extra_information'] = btf_description

        product_details = {
            'product_short_description': re.sub(r'[^\x00-\x7F]+', '', productDescription) if productDescription else None, 
            'product_long_description': re.sub(r'[^\x00-\x7F]+', '',long_description),
            'brand_story': re.sub(r'[^\x00-\x7F]+', '', brand_story)

        }        

        product_details.update(product_config)
        return product_details, images

    def quit(self):
        self.driver.quit()

In [50]:
def get_all_contexts(text, target_word, context_size=5):
    # Tokenize the text
    tokens = word_tokenize(text)

    tokens = [token for token in tokens if token not in string.punctuation]
    # Find all occurrences of the target word
    
    target_indices = [i for i, token in enumerate(tokens) if token.lower() == target_word.lower()]

    # Extract context sentences for each occurrence of the target word
    all_contexts = []
    for target_index in target_indices:
        start_index = max(0, target_index - context_size)
        end_index = min(len(tokens), target_index + context_size + 1)
        context_words = tokens[start_index:end_index]
        context_sentence = ' '.join(context_words)
        all_contexts.append(context_sentence)

    return all_contexts


In [144]:
def get_app_name(nlp, text):

    contexts = get_all_contexts(text, "app")

    print(contexts)
    app_name = defaultdict(int)

    for context in contexts:
        doc = nlp(context)

        #iterate through the entities
        for ent in doc.ents:
            name = re.sub("[^a-zA-Z0-9]", "", ent.text.upper())
            app_name[name]  +=1


    return max(app_name, key=app_name.get) if app_name else None

In [147]:
#download punkt using nltk.download('punkt') and provide the path below
nltk.data.path.append(r"C:\Users\moink\Downloads\white_label\tokenizers")

#load the model
# nlp = spacy.load(r"C:\Users\moink\Downloads\white_label\model-best")
nlp = spacy.load(r"C:\Users\moink\Downloads\white_label\case_sensitive_ner")



In [149]:
scraper = AmazonScraper("smart plug wifi")

In [7]:
product_urls = scraper.getProductURLList()

In [8]:
len(product_urls)

433

In [151]:
product_details= {}

In [152]:
for product_url in product_urls:
    print(product_url)
    scraper.driver.get(product_url)
    product = {}
    product_name, product_id = scraper.getProductNameAndIDFromURL()

    if product_id not in product_details:
        product['id'] = product_id
        product['name'] = product_name

        main_image = scraper.getLeftImage()[0]
        center_div = scraper.parseCenterDiv()

        bottom, _ = scraper.parseBottomDivs()

        product['long_description'] = bottom['product_long_description'] or bottom['brand_story'] or bottom['product_short_description'] or center_div['product_about']
        product['brand'] = center_div['product_brand']


        description_list = [center_div['product_about'] , bottom['product_long_description'] , bottom['brand_story'] , bottom['product_short_description']]

        product['app_name'] = None
        for description in description_list:
            
            if description:
                product['app_name'] = get_app_name(nlp, description.replace("\n", " "))
                if product['app_name']:
                    break

        print(product['app_name'])

        product['image_url'] = main_image

        product['url'] = product_url
        product_details[product_id] = product

https://www.amazon.com/sspa/click?ie=UTF8&spc=MToyNjE0NTkyMzkzNTk2NzA1OjE3MDE5MDk4NDY6c3BfYXRmOjIwMDA2Njg0OTU1NTg2MTo6MDo6&url=%2FAmazon-smart-plug-works-with-Alexa%2Fdp%2FB089DR29T6%2Fref%3Dsr_1_1_ffob_sspa%3Fkeywords%3Dsmart%2Bplug%2Bwifi%26qid%3D1701909846%26sr%3D8-1-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9hdGY%26psc%3D1
['useplug in open the Alexa app and get started in minutes', 'and schedules through the Alexa app']
ALEXA
https://www.amazon.com/sspa/click?ie=UTF8&spc=MToyNjE0NTkyMzkzNTk2NzA1OjE3MDE5MDk4NDY6c3BfYXRmOjMwMDA5MTQ3OTk5NDEwMjo6MDo6&url=%2FBeysen-YX-WX01C-Smart-Plug%2Fdp%2FB09ZL14T3Y%2Fref%3Dsr_1_2_sspa%3Fkeywords%3Dsmart%2Bplug%2Bwifi%26qid%3D1701909846%26sr%3D8-2-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9hdGY%26psc%3D1
['Alexa can control the plug APP Control Anywhere with the Alexa', 'Control Anywhere with the Alexa APP on your mobile phone you']
ALEXA
https://www.amazon.com/sspa/click?ie=UTF8&spc=MToyNjE0NTkyMzkzNTk2NzA1OjE3MDE5MDk4NDY6c3BfYXRmOjIwMDExMzk2NzE0NzY5ODo6MDo6&url

['Alexa can control the plug APP Control Anywhere with the Alexa', 'Control Anywhere with the Alexa APP on your mobile phone you']
ALEXA
https://www.amazon.com/Outlet-Compatible-Smartthings-Control-Function/dp/B0B62LPR5Z/ref=sr_1_10?keywords=smart+plug+wifi&qid=1701909846&sr=8-10
["Just open the Smart Life App and your phone 's Bluetooth", 'plug with the Smart Life App first Warm Tips Our smart']
SMARTLIFE
https://www.amazon.com/sspa/click?ie=UTF8&spc=MToyNjE0NTkyMzkzNTk2NzA1OjE3MDE5MDk4NDY6c3BfbXRmOjIwMDA3MDE3ODkxMDE1MTo6MDo6&url=%2FTreatlife-Smart-Light-Switch-Assistant%2Fdp%2FB07R4MFWCQ%2Fref%3Dsr_1_11_sspa%3Fkeywords%3Dsmart%2Bplug%2Bwifi%26qid%3D1701909846%26sr%3D8-11-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9tdGY%26psc%3D1%26smid%3DA3U2GPXV9J2W2G
['anywhere using the Smart Life app on your smartphone Whether you']
SMARTLIFE
https://www.amazon.com/sspa/click?ie=UTF8&spc=MToyNjE0NTkyMzkzNTk2NzA1OjE3MDE5MDk4NDY6c3BfbXRmOjIwMDA3MDE3NzkxMDE1MTo6MDo6&url=%2FRequired-Treatlife-Compatible-Assi

['SCHEDULING AUTOMATION The KMC Smart app allows you to automate your', 'Simply download the KMC Smart app and follow the in-app instructions']
KMC
https://www.amazon.com/EIGHTREE-Compatible-SmartThings-Control-Function/dp/B0B2PFFDKF/ref=sr_1_28?keywords=smart+plug+wifi&qid=1701909846&sr=8-28
['APP Remote Control Easily control your', 'any place through Smart Life APP No more worrying about forgetting']
SMARTLIFE
https://www.amazon.com/HBN-Grounded-Smartlife-Compatible-Assistant/dp/B07WNKQ28B/ref=sr_1_29?keywords=smart+plug+wifi&qid=1701909846&sr=8-29
['Smart Smart Life Tuya Smart App you could turn on off']
SMARTLIFE
https://www.amazon.com/HBN-Individual-Grounded-Compatible-Assistant/dp/B07XZJ412P/ref=sr_1_30?keywords=smart+plug+wifi&qid=1701909846&sr=8-30
https://www.amazon.com/Compatible-Assistant-Required-Schedule-Function/dp/B07SVYJZ5C/ref=sr_1_31?keywords=smart+plug+wifi&qid=1701909846&sr=8-31
['Share with your family FREE APP CONTROL Connect the power plug', "and Download the Fr

["Control After downloading the free APP `` Smart Life Tuya ''"]
SMARTLIFE
https://www.amazon.com/Compatible-SmartThings-Smartphone-Weatherproof-Certified/dp/B09JLLVKZ2/ref=sr_1_58?keywords=smart+plug+wifi&qid=1701909846&sr=8-58
['and download the Smart Life app Its as easy as one', 'using the free Smart Life App on your phone and enjoy']
SMARTLIFE
https://www.amazon.com/Socket-Control-Compatible-Assistant-SmartThing/dp/B0B7LPN5CB/ref=sr_1_59?keywords=smart+plug+wifi&qid=1701909846&sr=8-59
['anytime anywhere with the free app You can check the status', 'Use Download the Smart life APP connect to 2.4GHz Wi-Fi Add']
SMARTLIFE
https://www.amazon.com/EX-1-Schedulete-Controller-Certified-Connection/dp/B0B25DYDV9/ref=sr_1_60?keywords=smart+plug+wifi&qid=1701909846&sr=8-60
['up those smart plugs No App required for the pairing process', 'voice and in the Alexa app remote control group control schedule']
ALEXA
https://aax-us-iad.amazon.com/x/c/RLAqFmFBifl2f8bABLrdtiEAAAGMQbtZQwEAAAH2AQBvbm9fdH

['Now IFTTT is supporting too APP Remote ControlRemotely control ON/OFF your', 'home appliances and devices via APP eWeLink on smart phone or', "Home '' on Google Home app start to voice control your"]
EWELINK
https://www.amazon.com/sspa/click?ie=UTF8&spc=MTo3NjAwNjMzNDU1NTY3NzA1OjE3MDE5MDk4NjE6c3Bfc2VhcmNoX3RoZW1hdGljOjMwMDA3NjE2MTg0ODEwMjo6NDo6&url=%2FELEGRP-Outdoor-Waterproof-Individually-Assistant%2Fdp%2FB0BFVNRS75%2Fref%3Dsxin_14_pa_sp_search_thematic_sspa%3Fcontent-id%3Damzn1.sym.d6ad0a4b-2eda-440d-98a3-7289fc1c492e%253Aamzn1.sym.d6ad0a4b-2eda-440d-98a3-7289fc1c492e%26cv_ct_cx%3Dsmart%2Bplug%2Bwifi%26keywords%3Dsmart%2Bplug%2Bwifi%26pd_rd_i%3DB0BFVNRS75%26pd_rd_r%3D9142f8c7-de1a-4be0-99ed-7a246a197498%26pd_rd_w%3DbAtp0%26pd_rd_wg%3DOhBky%26pf_rd_p%3Dd6ad0a4b-2eda-440d-98a3-7289fc1c492e%26pf_rd_r%3DTJDG9YVN13CVB0V5H498%26qid%3D1701909861%26sbo%3DRZvfv%252F%252FHxDF%252BO5021pAnSA%253D%253D%26sr%3D1-53-f853d353-bf33-45e7-b5c2-2cb2b31abc9b-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9zZWFyY2

['fully protected and running well APP Remote Control from Anywhere Download', 'from Anywhere Download BN-LINK Smart app and connect it according to']
BNLINK
https://www.amazon.com/sspa/click?ie=UTF8&spc=MTo3MjM1MDcyMTQ5ODI1NzA1OjE3MDE5MDk4NjE6c3BfbXRmOjMwMDAzNzg1MDM1MDUwMjo6MDo6&url=%2FRetractable-Extension-Protection-Outlets-Workshop%2Fdp%2FB0C9HK2J62%2Fref%3Dsr_1_76_sspa%3Fkeywords%3Dsmart%2Bplug%2Bwifi%26qid%3D1701909861%26sr%3D8-76-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9tdGY%26psc%3D1
[]
[]
None
https://www.amazon.com/Otcboimo-Wireless-Ewelink-Intelligent-Appliances/dp/B0CGDCB8TS/ref=sr_1_77?keywords=smart+plug+wifi&qid=1701909861&sr=8-77
["family 's high-power electrical appliances APP REMOTE CONTROL Control your smart", 'your smart switch on Ewelink App anytime any where Always know', 'know the computer status on App DEVICE SHARING You Ccan share', 'control home appliances through Ewelink app It works from anywhere you']
EWELINK
https://www.amazon.com/Support-Control-Reliable-Conne

['Schedules Download the Energizer Connect App with the 2.4 GHz connection']
['use the downloadable Energizer Connect App to create custom schedules based']
None
https://www.amazon.com/WeMo-Switch-Smart-Works-Alexa/dp/B00BB2MMNE/ref=sr_1_103?keywords=smart+plug+wifi&qid=1701909861&sr=8-103
['Wemo Switch download the free app and start controlling your lights']
WEMO
https://www.amazon.com/MODMA-Low-Profile-Required-Certified-Compatible/dp/B0CNZQNB45/ref=sr_1_104?keywords=smart+plug+wifi&qid=1701909861&sr=8-104
['Schedule devices effortlessly via our app leveraging weather time or events']
['a hub The free Smart App enables you to automate your']
None
https://www.amazon.com/MOES-Matter-Monitoring-Assistant-Compact/dp/B0CF887Z9P/ref=sr_1_105?keywords=smart+plug+wifi&qid=1701909861&sr=8-105
['with a simple voice command APP REMOTE OUTLET PLUGWith our smart', 'with ease Simply use the APP to connect to your wifi', 'lives TIMER/ENERGY MONITORING/CHILD SAFETY SYSTEMSThe app of smart plugs tha

https://www.amazon.com/sspa/click?ie=UTF8&spc=MToxOTQ2OTkyMDYzMTg5NzA1OjE3MDE5MDk4NjU6c3Bfc2VhcmNoX3RoZW1hdGljOjMwMDAyNTExMjQwNDAwMjo6NDo6&url=%2FTREATLIFE-Individual-Resistance-Compatible-SmartThings%2Fdp%2FB0CCCJFCSY%2Fref%3Dsxin_14_pa_sp_search_thematic_sspa%3Fcontent-id%3Damzn1.sym.d6ad0a4b-2eda-440d-98a3-7289fc1c492e%253Aamzn1.sym.d6ad0a4b-2eda-440d-98a3-7289fc1c492e%26cv_ct_cx%3Dsmart%2Bplug%2Bwifi%26keywords%3Dsmart%2Bplug%2Bwifi%26pd_rd_i%3DB0CCCJFCSY%26pd_rd_r%3Df5fd42a5-68d8-4faf-8ce5-1899eeb315c1%26pd_rd_w%3DCacoz%26pd_rd_wg%3DzceJq%26pf_rd_p%3Dd6ad0a4b-2eda-440d-98a3-7289fc1c492e%26pf_rd_r%3DS3PYQBAQ1B7VG0FRP18R%26qid%3D1701909865%26sbo%3DRZvfv%252F%252FHxDF%252BO5021pAnSA%253D%253D%26sr%3D1-101-f853d353-bf33-45e7-b5c2-2cb2b31abc9b-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9zZWFyY2hfdGhlbWF0aWM%26psc%3D1
['needed Plug in open Treatlife app follow simple instructions and enjoy', 'a single tap Voice Control App Remote Control Add voice control', 'your smartphone using the Treatlife 

[]
[]
[]
None
https://www.amazon.com/sspa/click?ie=UTF8&spc=MTo0NzgyNTE4NDA4MTA0NzA1OjE3MDE5MDk4NjU6c3BfbXRmOjIwMDA3MzE0NDQyMDE1MTo6MDo6&url=%2FTreatlife-Assistant-Required-Waterproof-Dimmable%2Fdp%2FB086V3S2Q7%2Fref%3Dsr_1_124_sspa%3Fkeywords%3Dsmart%2Bplug%2Bwifi%26qid%3D1701909865%26sr%3D8-124-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9tdGY%26psc%3D1
['know more No Hub Required APP Control Enjoy the convenience of', 'phone using the Smart Life/TreatLife app No cumbersome hub is required']
SMARTLIFE
https://www.amazon.com/Classic-Outlet-Required-Certified-HS105/dp/B01K1JVZOE/ref=sr_1_125?keywords=smart+plug+wifi&qid=1701909865&sr=8-125
['your smartphone using the Kasa app Compatible w/ Android iOS Voice']
KASA
https://www.amazon.com/Peiiwdc-Socket-Control-Intelligent-Monitor/dp/B0CLJFH6G4/ref=sr_1_126?keywords=smart+plug+wifi&qid=1701909865&sr=8-126
[]
['socket eu uk us with app mini Features brand new and']
EU
https://www.amazon.com/Sparkleiot-Outlet-Control-Assistant-Required/dp/B09K5DV17

['ways via the free Lutron app with your voice or at']
LUTRON
https://www.amazon.com/lanema-Intelligent-Control-Function-Wireless/dp/B0CLCSHL9G/ref=sr_1_153?keywords=smart+plug+wifi&qid=1701909865&sr=8-153
[]
[]
None
https://www.amazon.com/Compatible-SmartThings-Lighted-Christmas-Crystal/dp/B0CN3RQGGD/ref=sr_1_154?keywords=smart+plug+wifi&qid=1701909865&sr=8-154
['on Bluetooth download the CozyLife app and log in When the', 'flashing mode open the CozyLife app to pair and connect the']
COZYLIFE
https://www.amazon.com/Xiaoxin-Socket-Extender-Assistant-Required/dp/B0BWJ4L918/ref=sr_1_155?keywords=smart+plug+wifi&qid=1701909865&sr=8-155
['smartphone anytime via the Smart app TIMER AND SCHEDULE SETTING- Use']
[]
None
https://www.amazon.com/Compatible-SmartThings-Lighted-Christmas-Snowman/dp/B0CN3RCNVL/ref=sr_1_156?keywords=smart+plug+wifi&qid=1701909865&sr=8-156
['on Bluetooth download the CozyLife app and log in When the', 'flashing mode open the CozyLife app to pair and connect the']
COZ

['network and the free SmartLife app on iOS or Android No', 'download the free Smart Life app on your cellphone NoteIt only']
SMARTLIFE
https://www.amazon.com/sspa/click?ie=UTF8&spc=MToyMTI5MDQxMDkwNjA0NzA1OjE3MDE5MDk4Njg6c3Bfc2VhcmNoX3RoZW1hdGljOjIwMDA3MDAxNjEwODc5ODo6NDo6&url=%2FSmarter-Living-Reliable-Supports-Required%2Fdp%2FB09LBSV1QM%2Fref%3Dsxin_14_pa_sp_search_thematic_sspa%3Fcontent-id%3Damzn1.sym.d6ad0a4b-2eda-440d-98a3-7289fc1c492e%253Aamzn1.sym.d6ad0a4b-2eda-440d-98a3-7289fc1c492e%26cv_ct_cx%3Dsmart%2Bplug%2Bwifi%26keywords%3Dsmart%2Bplug%2Bwifi%26pd_rd_i%3DB09LBSV1QM%26pd_rd_r%3D1e808beb-c42a-4e55-9d8a-f34fc6bb06ec%26pd_rd_w%3DAAMuz%26pd_rd_wg%3DAgS8D%26pf_rd_p%3Dd6ad0a4b-2eda-440d-98a3-7289fc1c492e%26pf_rd_r%3D2CKYQBRH0Q42YSYYQJQF%26qid%3D1701909868%26sbo%3DRZvfv%252F%252FHxDF%252BO5021pAnSA%253D%253D%26sr%3D1-149-f853d353-bf33-45e7-b5c2-2cb2b31abc9b-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9zZWFyY2hfdGhlbWF0aWM%26psc%3D1
['via Smart Life and Tuya app available on Android and I

['that you receive accurate Email App alerts on wireless security camera']
['with PIR Sensor NVR Buzzer App Alert Push Email Notification Remote', "Remote Viewing Android/IOS Smartphone/Pad/PC/Laptop/Notebooks Phone App `` EseeCloud '' NVR Recorder", 'PIR human detection NVR buzzer app alert push email notification remote', 'Siren 8X Digital Zoom on App or NVR Tonton Wireless NVR', '1TB Digital Zoom 8X on App or NVR 8X on App', 'App or NVR 8X on App or NVR 8X on App', 'App or NVR 8X on App or NVR 8X on App', 'App or NVR 8X on App or NVR 8X on App', 'App or NVR 8X on App or NVR NO NO Internet']
8X
https://www.amazon.com/sspa/click?ie=UTF8&spc=MTo3MzMxNjQxNzU2Nzc2NzA1OjE3MDE5MDk4Njg6c3BfbXRmOjMwMDAwODIyNzEwNTEwMjo6MDo6&url=%2FSwitch-Assistant-Control-Required-Neutral%2Fdp%2FB0B1HQJ56C%2Fref%3Dsr_1_172_sspa%3Fkeywords%3Dsmart%2Bplug%2Bwifi%26qid%3D1701909868%26sr%3D8-172-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9tdGY%26psc%3D1
["time through your smartphone 's app TIME SCHEDULE Install the wifi

['oversee household devices using the app for our WiFi Outlet Plug']
['WiFi Smart Plug features both APP and voice control functions that']
None
https://www.amazon.com/Minoston-Waterproof-Compatible-Assistant-Required/dp/B0CBTVP8WT/ref=sr_1_199?keywords=smart+plug+wifi&qid=1701909868&sr=8-199
['need is the free SmartLife app for iOS or Android and', 'network download the Smart Life app for your smartphone and you']
SMARTLIFE
https://www.amazon.com/Smart-Proof-Outlet-Sockets-Switch/dp/B0C7GY5V54/ref=sr_1_200?keywords=smart+plug+wifi&qid=1701909868&sr=8-200
['device to WiFi open the APP you can control the power', '15A 1875W Multiple Function Support APP power on/off control time and']
[]
['Wireless Type WiFi 2.4GHz b/g/n App Name for Tuya Smart for']
None
https://www.amazon.com/GLP1000A-SET2-Required-Control-Assistant-Compatible/dp/B0BWBZCB16/ref=sr_1_201?keywords=smart+plug+wifi&qid=1701909868&sr=8-201
['to access control through the App and share schedules and routines']
['and downloa

https://www.amazon.com/sspa/click?ie=UTF8&spc=MTo3MTkxMzI5MDUzMTM5NzA1OjE3MDE5MDk4NzE6c3Bfc2VhcmNoX3RoZW1hdGljOjIwMDAzMTA1ODY5OTQ2MTo6Mzo6&url=%2FHBN-Outdoor-Required-Compatible-Assistant%2Fdp%2FB08D4RQR1T%2Fref%3Dsxin_14_pa_sp_search_thematic_sspa%3Fcontent-id%3Damzn1.sym.d6ad0a4b-2eda-440d-98a3-7289fc1c492e%253Aamzn1.sym.d6ad0a4b-2eda-440d-98a3-7289fc1c492e%26cv_ct_cx%3Dsmart%2Bplug%2Bwifi%26keywords%3Dsmart%2Bplug%2Bwifi%26pd_rd_i%3DB08D4RQR1T%26pd_rd_r%3D20fd5dad-87b2-4377-a74d-f3d1b6c4be3e%26pd_rd_w%3DTFP6V%26pd_rd_wg%3DaUpgy%26pf_rd_p%3Dd6ad0a4b-2eda-440d-98a3-7289fc1c492e%26pf_rd_r%3DNXK6YRVHB9CSM6AM667V%26qid%3D1701909871%26sbo%3DRZvfv%252F%252FHxDF%252BO5021pAnSA%253D%253D%26sr%3D1-196-f853d353-bf33-45e7-b5c2-2cb2b31abc9b-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9zZWFyY2hfdGhlbWF0aWM%26psc%3D1
['Smart Smart Life Tuya Smart App compatible with Amazon Alexa Google']
SMARTLIFE
https://www.amazon.com/sspa/click?ie=UTF8&spc=MTo3MTkxMzI5MDUzMTM5NzA1OjE3MDE5MDk4NzE6c3Bfc2VhcmNoX3RoZW1hdGlj

['socket and connect to the app and you can use it']
['the instructions to download the APP and set it up then']
None
https://www.amazon.com/sspa/click?ie=UTF8&spc=MToyMjE3Mzg4MTE1OTUwNzA1OjE3MDE5MDk4NzE6c3BfbXRmOjIwMDAxNjA1MTY1OTg0MTo6MDo6&url=%2FCentury-Wireless-Electrical-Christmas-Appliance%2Fdp%2FB01NASZ0UM%2Fref%3Dsr_1_217_sspa%3Fkeywords%3Dsmart%2Bplug%2Bwifi%26qid%3D1701909871%26sr%3D8-217-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9tdGY%26psc%3D1
[]
['Physical Remote 1 Physical Remote APP Remote/Voice Control APP Remote/Voice Control', 'Physical Remote APP Remote/Voice Control APP Remote/Voice Control No.of Outlets Included']
[]
None
https://www.amazon.com/sspa/click?ie=UTF8&spc=MToyMjE3Mzg4MTE1OTUwNzA1OjE3MDE5MDk4NzE6c3BfbXRmOjMwMDAyMTU2MTczNTkwMjo6MDo6&url=%2FLinkind-SmartThings-Automation-Control-Schedule%2Fdp%2FB0C371HB66%2Fref%3Dsr_1_218_sspa%3Fkeywords%3Dsmart%2Bplug%2Bwifi%26qid%3D1701909871%26sr%3D8-218-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9tdGY%26psc%3D1
https://www.amazon.com/

['are after downloading the FREE APP Smart Life as long as']
SMARTLIFE
https://www.amazon.com/Outlet-Visible-Dustproof-Smart-Balcony/dp/B0CG4W85GF/ref=sr_1_239?keywords=smart+plug+wifi&qid=1701909871&sr=8-239
['Real Time APP Monitoring With its APP real', 'Time APP Monitoring With its APP real time monitoring system you']
['all times 5 Real Time APP Monitoring With its APP real', 'Time APP Monitoring With its APP real time monitoring system you', '250V Rated Frequency 50Hz 60HZ APP for Tuya smart life Wifi', 'How to Use Download the APP then open the login connection']
SMARTLIFE
https://www.amazon.com/Socket-Outlet-Google-Remote-Control/dp/B0BWN14WQX/ref=sr_1_240?keywords=smart+plug+wifi&qid=1701909871&sr=8-240
['Must Have smart plugs with app for relaxed smart life Hand-Free', 'control your connected home devices APP Remote Control From Anywhere You', 'Alexa smart plug via the APP directly when you are away']
ALEXA
https://www.amazon.com/eco4life-Extenders-Charging-Required-Certificat

https://www.amazon.com/sspa/click?ie=UTF8&spc=MToyMDc0NjM2NzEwNjIzNzA1OjE3MDE5MDk4NzU6c3Bfc2VhcmNoX3RoZW1hdGljOjMwMDA3NjE2MTg0NzkwMjo6MDo6&url=%2FELEGRP-Waterproof-Assistant-Required-Certified%2Fdp%2FB0BFVW2YMC%2Fref%3Dsxin_14_pa_sp_search_thematic_sspa%3Fcontent-id%3Damzn1.sym.d6ad0a4b-2eda-440d-98a3-7289fc1c492e%253Aamzn1.sym.d6ad0a4b-2eda-440d-98a3-7289fc1c492e%26cv_ct_cx%3Dsmart%2Bplug%2Bwifi%26keywords%3Dsmart%2Bplug%2Bwifi%26pd_rd_i%3DB0BFVW2YMC%26pd_rd_r%3D97c6c87d-63dd-44d5-9038-0413371d4c7e%26pd_rd_w%3DKwpf7%26pd_rd_wg%3DABT2y%26pf_rd_p%3Dd6ad0a4b-2eda-440d-98a3-7289fc1c492e%26pf_rd_r%3DBH0REAB5F1QFSFANC07Y%26qid%3D1701909874%26sbo%3DRZvfv%252F%252FHxDF%252BO5021pAnSA%253D%253D%26sr%3D1-241-f853d353-bf33-45e7-b5c2-2cb2b31abc9b-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9zZWFyY2hfdGhlbWF0aWM%26psc%3D1
["your family 's safety Smart APP Voice ControlWorks with Amazon Alexa", 'your smartphone using the ELEGRP app whether you are at home', 'to any desired level with APP Alexa and Google As

['fountain pumps and more Smart APP Voice ControlWorks with Amazon Alexa', 'your smartphone using the ELEGRP app whether you are at home']
ALEXA
https://www.amazon.com/sspa/click?ie=UTF8&spc=MTo2OTA1OTI3MDg4MjcwNzA1OjE3MDE5MDk4NzQ6c3BfbXRmOjIwMDE2NTkyODQ5NzY5ODo6MDo6&url=%2FSwitch-Neutral-Required-Electrical-Switches%2Fdp%2FB09YVC44D4%2Fref%3Dsr_1_260_sspa%3Fkeywords%3Dsmart%2Bplug%2Bwifi%26qid%3D1701909874%26sr%3D8-260-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9tdGY%26psc%3D1
['you use it with ease App Remote Control Voice ControlSmart light', 'smart phone via Smart Life/Tuya App wherever you are Take full', '1 hour etc with the app share control with family and', 'wiring process and set the App you can share the account']
SMARTLIFE
https://www.amazon.com/sspa/click?ie=UTF8&spc=MTo2OTA1OTI3MDg4MjcwNzA1OjE3MDE5MDk4NzQ6c3BfbXRmOjMwMDAzMTI5MDIwOTgwMjo6MDo6&url=%2FYitouniu-Momentary-Self-Locking-Bluetooth-Compatible%2Fdp%2FB0CBKXTR32%2Fref%3Dsr_1_261_sspa%3Fkeywords%3Dsmart%2Bplug%2Bwifi%26qid%3

['with remote control Real Time APP Monitoring With its APP real', 'Time APP Monitoring With its APP real time monitoring system you']
['all times 5 Real Time APP Monitoring With its APP real', 'Time APP Monitoring With its APP real time monitoring system you', '250V Rated Frequency 50Hz 60HZ APP for Tuya smart life Wifi', 'How to Use Download the APP then open the login connection']
SMARTLIFE
https://www.amazon.com/Socket-Waterproof-Clasp-Design-Bathroom/dp/B0CBLL3JFS/ref=sr_1_284?keywords=smart+plug+wifi&qid=1701909874&sr=8-284
['at all times Real Time APP Monitoring With its APP real', 'Time APP Monitoring With its APP real time monitoring system you']
['all times 5 Real Time APP Monitoring With its APP real', 'Time APP Monitoring With its APP real time monitoring system you', '250V Rated Frequency 50Hz 60HZ APP for Tuya smart life Wifi', 'How to Use Download the APP then open the login connection']
SMARTLIFE
https://www.amazon.com/Shutter-Visible-Dustproof-100-240V-Bathroom/dp/B0CB

[]
[]
[]
None
https://www.amazon.com/sspa/click?ie=UTF8&spc=MToyNjQ2MDM2MTkwODgzNzA1OjE3MDE5MDk4Nzg6c3BfYXRmX25leHQ6MzAwMDI0MjIwMjYzNTAyOjowOjo&url=%2FShelly-Bluetooth-Automation-Compatibility-Required%2Fdp%2FB0CC61K3T7%2Fref%3Dsr_1_292_sspa%3Fkeywords%3Dsmart%2Bplug%2Bwifi%26qid%3D1701909878%26sr%3D8-292-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9hdGZfbmV4dA%26psc%3D1
['device warranty SHELLY SMART CONTROL APP will help you control your']
['a voice assistant Use Shelly App to set automated scenes based']
SHELLY
https://www.amazon.com/VTOSEN-Curtain-Wireless-Control-Shutter/dp/B0CCL7DDVD/ref=sr_1_293?keywords=smart+plug+wifi&qid=1701909878&sr=8-293
https://www.amazon.com/Fastener-Design-Remote-Control-Bathroom/dp/B0CBLHMGLP/ref=sr_1_294?keywords=smart+plug+wifi&qid=1701909878&sr=8-294
https://www.amazon.com/Socket-Outlet-Google-Remote-Control/dp/B0BWN22K9B/ref=sr_1_295?keywords=smart+plug+wifi&qid=1701909878&sr=8-295
https://www.amazon.com/sspa/click?ie=UTF8&spc=MTo2MzIzNjY1MDE0OTI1NzA1OjE3MD

[]
['sound changes With the mobile app WiFi control allows users to', 'real-time device status to the App 2 Support the timing of', 'New Color Black White optional APP Smart Life Control method APP', 'APP Smart Life Control method APP control touch switch Power supply']
SMARTLIFE
https://www.amazon.com/sspa/click?ie=UTF8&spc=MToyNjQ2MDM2MTkwODgzNzA1OjE3MDE5MDk4Nzg6c3BfbXRmOjMwMDA1NDU2MTIxNzAwMjo6MDo6&url=%2FVIPMOON-Compatible-Speakers-Appointment-Function%2Fdp%2FB0CB635K3V%2Fref%3Dsr_1_305_sspa%3Fkeywords%3Dsmart%2Bplug%2Bwifi%26qid%3D1701909878%26sr%3D8-305-spons%26sp_csd%3Dd2lkZ2V0TmFtZT1zcF9tdGY%26psc%3D1
["the `` Smart Life '' app you can turn it on", 'on and off via the app You can also share control', 'also share control via the app with family and friends to']
SMARTLIFE
https://www.amazon.com/sspa/click?ie=UTF8&spc=MToyNjQ2MDM2MTkwODgzNzA1OjE3MDE5MDk4Nzg6c3BfbXRmOjIwMDE3NDA4MzY4OTM5ODo6MDo6&url=%2FYitouniu-AC85-250V-Temperature-Consumption-Monitoring%2Fdp%2FB0C2DBCP99%2Fref%3Dsr

In [164]:
# processed_pd = [product_details[product_detail]['data'] for product_detail in product_details]
df = pd.DataFrame(product_details.values())

df.to_csv(r"C:\Users\moink\Downloads\network-security-white-labelling\amazon_products_dataset.csv", index=False, encoding='utf-8')

In [153]:
class GoogleDriveWrapper:
    def __init__(self):
        auth = GoogleAuth()
        # Try to load saved client credentials
        auth.LoadCredentialsFile("creds.txt")
        if auth.credentials is None:
            # Authenticate if they're not there
            auth.LocalWebserverAuth()
        elif auth.access_token_expired:
            # Refresh them if expired
            auth.Refresh()
        else:
            # Initialize the saved creds
            auth.Authorize()
        # Save the current credentials to a file
        auth.SaveCredentialsFile("creds.txt")

        self.drive = GoogleDrive(auth)
        
    def getFileID(self, file_name, parent_id=None, url=False):
        """
            Function to get file id by name, if file is not present, return None
        """
        query = f"mimeType != 'application/vnd.google-apps.folder' and title='{file_name}'"
        if parent_id:
            query+= f" and '{parent_id}' in parents"

        file_list = self.drive.ListFile({'q':query}).GetList()
        for file in file_list:
            if url:
                return file['alternateLink']
            return file['id']
        
        return None
    
    
    def getFolderID(self, folder_name, parent_id=None, url=False):
        """
            Function to get folder id by name, if folder is not present, return None
        """
        
        #create the query to list the folder
        query = f"mimeType = 'application/vnd.google-apps.folder' and title='{folder_name}'"
        
        if parent_id:
            query+= f" and '{parent_id}' in parents"

        file_list = self.drive.ListFile({'q':query}).GetList()
        for file in file_list:
            if url:
                return file['alternateLink']
            return file['id']
        
        return None
    
    
    def createFolder(self, folder_name, parent_id=None):
        """
            Function to create a folder under a folder (if parent_id is present), return folder_id
        """
        #check if folder exists
        folder_id = self.getFolderID(folder_name, parent_id)
        
        if folder_id:
            return folder_id
        
        # folder not present, create the folder
        folder_metadata = {
            'title': folder_name,
            'mimeType': 'application/vnd.google-apps.folder',
        }
        
        if parent_id:
            folder_metadata['parents'] = [{'id': parent_id}]
        
        folder = self.drive.CreateFile(folder_metadata)
        folder.Upload()     
        
        return folder['id']
    
    
    
    def uploadFile(self, file_name, file_path=None, file_url=None, parent_id=None):
        """
            Function to upload File, 
            
            file_name: Name of the file to upload
            file_path: Upload the file on file path
            file_url: Upload a file from a url
            parent_id: Parent Folder Id
            
            Either Filepath or File_url is required to upload, if both present, picks file_path over file_url
        """
    
        # file metadata to upload
        file_metadata = {
            'title': file_name,
            'parents': [{'id': parent_id}] if parent_id else []
        }

        #upload a file from file_path
        if file_path:
            #get the mime_type of file
            file_metadata['mime_type'] = mimetypes.guess_type(file_path)[0]
            
            #create the file and set the content of file
            media = self.drive.CreateFile(metadata=file_metadata)
            media.SetContentFile(filename=file_path)

        elif file_url:
            
            # Make request to load the file from url
            headers = {'User-Agent': random.choice(user_agents)}
            req = urllib.request.Request(file_url, headers=headers)

            #if file size is greater, file loads partially
            max_retries = 100
            retry_count = 0
            remote_file = b""

            while retry_count <= max_retries:
                try:
                    #request to load the file
                    response = urllib.request.urlopen(req)
                    
                    #if request is succefull, read the file response and break the loop
                    remote_file = response.read()
                    break
                except http.client.IncompleteRead as e:
                    #if incompleteread exception, load the file partially 
                    remote_file += e.partial
                    retry_count += 1
                    print(f"IncompleteRead error. Retrying... (attempt {retry_count}/{max_retries})")
                    
                except urllib.error.HTTPError as err:
                    if err.status == 429:
                        #exponential backoff time
                        delay = min(5*2**retry_count, 40)
                        if delay > 40:
                            break
                            
                        
                        print(f"HTTP Error 429: sleeping for {delay} seconds!!")
                        
                        time.sleep(delay)
                        retry_count += 1
                    else:
                        print(err)
                        break
                except Exception as err:
                    print(err)
                    break

            #if no content, return None
            if not remote_file:
                return None


            #get the mime type of a file
            file_metadata['mime_type'] = mimetypes.guess_type(file_name)[0]
            
            #create the file and convet the file to BytesIO since drive requires to read file in string where it encodes the file
            media = self.drive.CreateFile(metadata=file_metadata)
            media.content = BytesIO(remote_file)

        else:
            raise Exception("Required either file_path or file_url")

        #uplaod the file
        media.Upload()

        print(f"File '{file_name}' has been uploaded to Google Drive.")

        #return the web link
        return media['alternateLink']

def guessMIMEType(file_path = None, memory_file=None):
    try:
        #return the mimetype for filepath
        if file_path:
            return mimetypes.guess_type(file_path)[0]
        
        #find the mimetype for file stored in memory
        elif memory_file:
            mime = magic.Magic(mime=True)
            mime_type = mime.from_buffer(memory_file.read())

            memory_file.seek(0)  # Reset the file pointer after reading the content

            return mime_type
        else:
            raise Exception("Required either file_path or memory_file")
    except Exception as err:
        print(err)
        return 'application/octet-stream'


In [None]:
import pandas as pd

df = pd.read_csv(r"C:\Users\moink\Downloads\network-security-white-labelling\amazon_products_dataset.csv")

In [None]:
for i, image in df.iterrows():
    image_name = f"{image['id']}_{image['name'].replace(' ', '-')}.jpg"
    
    
    #check if file already present
    file_id = drive.getFileID(file_name=image_name, parent_id=shared_folder_id)

    #file not present
    if not file_id:
        #upload file under the parent folder with folder id
        file_id = drive.uploadFile(file_name=image_name, file_url= image['image_url'], parent_id=shared_folder_id)
