In [2]:
#Imports
from datetime import datetime
import pandas as pd
from enum import Enum
#Imports for Scraping
import selenium
from selenium import webdriver
from selenium.webdriver.firefox.service import Service as FirefoxService
from webdriver_manager.firefox import GeckoDriverManager
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import WebDriverException
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from os import path
import time
import sys


#Creator's Note: Products(Enum) and ProductsLoader is probably the only classes you need to edit 
#unless you need to change the way the data is cleaned. Which handled in the DataCleaner class

#These class is here so that we can expand to differnet products easier making the spider more dynamic and expandable
class Products(Enum):
    #Add products like this ProductName = index iteration, [], [] 
    #the 2 empty list will be filled in using the ProductsLoader class
    Bacon = 0, [], []
    Eggs = 1, [], []
    HeirloomTomatoes = 2, [], []

    # Helper method to reduce code for adding to the products and weed out duplicate inputs
    # if you type something in really wrong code will stop the setup is important 
    # correct index inputs are correct index number, url, urls, xpath, xpaths
    def addToProduct(self, items, index):
        product = None
        if isinstance(index, int):
            product = self.value[index]
        elif isinstance(index, str):
            if index.lower() in ['urls', 'url']:
                product = self.value[1]
            elif index.lower() in ['xpaths', 'xpath']:
                product = self.value[2]
        if product == None:
            raise ValueError(f"Invalid index input for ({index}) for input: {items}")
        #Sets are fast at finding dups so we use them for speed
        product_set = set(product)
        for item in items:
            if item not in product_set:
                product.append(item)
                product_set.add(item)

#This class loads the xpaths and urls to the Products Enum and adds dataframes to the spider
class ProductsLoader():
    DataFrames = []
    def __init__(self):
        self.dataFrameAdder()
        self.urlsAdder()
        self.xpathMaker()

    #This adds the dataframe to the spider on load
    def dataFrameAdder(self):
        #Dataframes (You can add more here)
        baconFrame = pd.DataFrame(columns=['Bacon', 'Current Price', 'Sale', 'Weight', 'Url'])
        eggFrame = pd.DataFrame(columns=['Egg', 'Current Price', 'Sale', 'Amount', 'Url'])
        tomatoFrame = pd.DataFrame(columns=['Heirloom Tomato', 'Current Price', 'Sale', 'Weight', 'Url'])
        self.DataFrames = [baconFrame,
                           eggFrame,
                           tomatoFrame
                          ]

    #Adding Urls to products
    def urlsAdder(self):
        BaconUrls = ['https://www.hy-vee.com/aisles-online/p/11315/Hormel-Black-Label-Thick-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/47128/Hormel-Black-Label-Fully-Cooked-Original-Thick-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/41626/Applegate-Naturals-Uncured-Sunday-Bacon-Hickory-Smoked',
                     'https://www.hy-vee.com/aisles-online/p/57278/HyVee-Double-Smoked-Thick-Sliced-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/2405550/Applegate-Naturals-No-Sugar-Uncured-Bacon-Hickory-Smoked',
                     'https://www.hy-vee.com/aisles-online/p/57279/HyVee-Sweet-Smoked-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/11366/Hormel-Black-Label-Original-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/2455081/Jimmy-Dean-Premium-Hickory-Smoked-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/3595492/Farmland-Bacon-Double-Smoked-Double-Thick-Cut',
                     'https://www.hy-vee.com/aisles-online/p/47117/Hormel-Black-Label-Center-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/57277/HyVee-Center-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/2199424/Country-Smokehouse-Thick-Applewood-Slab-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/77228/Hormel-Black-Label-Original-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/21239/Farmland-Naturally-Hickory-Smoked-Classic-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/2456254/Jimmy-Dean-Premium-Applewood-Smoked-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/21240/Farmland-Naturally-Hickory-Smoked-Thick-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/47159/Hormel-Black-Label-Original-Bacon-4Pk',
                     'https://www.hy-vee.com/aisles-online/p/50315/Oscar-Mayer-Naturally-Hardwood-Smoked-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/50321/Oscar-Mayer-Center-Cut-Original-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/50316/Oscar-Mayer-Thick-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/2199421/Country-Smokehouse-Thick-Hickory-Smoked-Slab-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/104466/Hickory-Country-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/23975/HyVee-Hickory-House-Applewood-Naturally-Smoked-Thick-Sliced-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/23949/HyVee-Sweet-Smoked-Thick-Sliced-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/23963/HyVee-Fully-Cooked-Hickory-Smoked-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/11173/Hormel-Black-Label-Applewood-Thick-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/21317/Farmland-Naturally-Applewood-Smoked-Classic-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/21238/Farmland-Naturally-Hickory-Smoked-Thick-Cut-Bacon-Package',
                     'https://www.hy-vee.com/aisles-online/p/23948/HyVee-Lower-Sodium-Sweet-Smoked-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/458259/Wright-Naturally-Hickory-Smoked-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/11384/Hormel-Natural-Choice-Uncured-Original-Bacon-12-oz',
                     'https://www.hy-vee.com/aisles-online/p/2476490/Jimmy-Dean-FC-Hickory-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/1646677/Smithfield-Hometown-Original-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/53849/Farmland-Naturally-Hickory-Smoked-Lower-Sodium-Classic-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/47121/Hormel-Black-Label-Maple-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/164627/Oscar-Mayer-Fully-Cooked-Original-Bacon-252-oz-Box',
                     'https://www.hy-vee.com/aisles-online/p/23974/HyVee-Hickory-House-Hickory-Smoked-Thick-Sliced-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/50319/Oscar-Mayer-Selects-Smoked-Uncured-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/2471760/Jimmy-Dean-FC-Applewood-Smoked-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/16239/Oscar-Mayer-Center-Cut-Thick-Sliced-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/2214511/Hormel-Black-Label-Original-Thick-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/1008152/Wright-Naturally-Smoked-Applewood-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/1813260/Smithfield-Naturally-Hickory-Smoked-Thick-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/23976/HyVee-Hickory-House-Peppered-Naturally-Smoked-Thick-Sliced-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/21320/Farmland-Naturally-Applewood-Smoked-Thick-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/21253/Farmland-Naturally-Hickory-Smoked-Extra-Thick-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/1255920/Hormel-Black-Label-Cherrywood-Thick-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/57304/HyVee-Blue-Ribbon-Maple-Naturally-Smoked-Thick-Sliced-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/21252/Farmland-Naturally-Hickory-Smoked-30-Less-Fat-Center-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/2501872/Bourbon-And-Brown-Sugar-Slab-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/2516586/Hormel-Natural-ChoiceOriginal-Thick-Cut-Uncured-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/21319/Farmland-Naturally-Hickory-Smoked-Double-Smoked-Classic-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/317829/Des-Moines-Bacon-And-Meat-Company-Hardwood-Smoked-Uncured-Country-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/1255919/Hormel-Black-Label-Jalapeno-Thick-Cut-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/3538865/Oscar-Mayer-Bacon-Thick-Cut-Applewood',
                     'https://www.hy-vee.com/aisles-online/p/317830/Des-Moines-Bacon-And-Meat-Company-Applewood-Smoked-Bacon',
                     'https://www.hy-vee.com/aisles-online/p/3308731/Oscar-Mayer-Natural-Fully-Cooked-Uncured-Bacon'
                    ]
        EggUrls = ['https://www.hy-vee.com/aisles-online/p/57236/HyVee-Grade-A-Large-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/23899/HyVee-Grade-A-Large-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/715446/Farmers-Hen-House-Free-Range-Organic-Large-Brown-Grade-A-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/2849570/Thats-Smart-Large-Shell-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/31351/Farmers-Hen-House-Free-Range-Grade-A-Large-Brown-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/23900/HyVee-Grade-A-Extra-Large-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/71297/Egglands-Best-Farm-Fresh-Grade-A-Large-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/36345/Egglands-Best-Grade-A-Large-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/3192325/HyVee-Free-Range-Large-Brown-Egg-Grade-A',
                   'https://www.hy-vee.com/aisles-online/p/23903/HyVee-Grade-A-Jumbo-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/3192323/HyVee-Cage-Free-Large-Brown-Egg-Grade-A',
                   'https://www.hy-vee.com/aisles-online/p/36346/Egglands-Best-Cage-Free-Brown-Grade-A-Large-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/3192322/HyVee-Cage-Free-Large-Brown-Egg-Grade-A',
                   'https://www.hy-vee.com/aisles-online/p/858343/HyVee-Cage-Free-Omega3-Grade-A-Large-Brown-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/1901565/Farmers-Hen-House-Pasture-Raised-Organic-Grade-A-Large-Brown-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/60364/HyVee-HealthMarket-Organic-Grade-A-Large-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/71298/Egglands-Best-Extra-Large-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/23902/HyVee-Grade-A-Extra-Large-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/453006/Egglands-Best-XL-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/2668550/HyVee-One-Step-Pasture-Raised-Large-Brown-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/66622/Farmers-Hen-House-Jumbo-Brown-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/3274825/Nellies-Eggs-Brown-Free-Range-Large',
                   'https://www.hy-vee.com/aisles-online/p/57235/HyVee-Grade-A-Medium-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/2437128/Pete-And-Gerrys-Eggs-Organic-Brown-Free-Range-Large',
                   'https://www.hy-vee.com/aisles-online/p/36347/Egglands-Best-Organic-Cage-Free-Grade-A-Large-Brown-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/2698224/Nellies-Free-Range-Eggs-Large-Fresh-Brown-Grade-A',
                   'https://www.hy-vee.com/aisles-online/p/57237/HyVee-Grade-A-Large-Eggs',
                   'https://www.hy-vee.com/aisles-online/p/190508/Farmers-Hen-House-Organic-Large-Brown-Eggs'
                  ]
        HeirloomTomatoesUrls = ['https://www.hy-vee.com/aisles-online/p/37174/']

        Products.Bacon.addToProduct(BaconUrls,'urls')
        Products.Eggs.addToProduct(EggUrls,'urls')
        Products.HeirloomTomatoes.addToProduct(HeirloomTomatoesUrls,'urls')

    #This handles the xpaths by adding to the Products class
    #most websites have simular xpaths for each item. You might need to make differnet xpaths for each item 
    #if that is the case
    #For assigning xpaths mark them if they are optional meaning it could or could not be present on the page 
    #we do this for speed up if you mark it as non optional and its not pressent it will skip the value 
    #and hurt the preformence
    #best practice is to render the optional last so it reduces the chances of skipping 
    #Note spiecal cases do happen but they are extremely rare a good indiaction of finding one 
    #is by using skipHandler method and tracking/watching the logs  
    #IMPORTANT < -!- NOT ALL XPATHS ARE THE SAME FOR EACH PRODUCT -!->
    def xpathMaker(self):
        #Add the xpaths here and mark if they are optional
        nameXpath = '//*[contains(@class, "product-details_detailsContainer")]/h1'
        priceXpath = '//*[contains(@class, "product-details_detailsContainer")]/p[1]'
        prevPriceXpath = '//*[contains(@class, "product-details_detailsContainer")]/p[2]'
        weightXpath = '//*[contains(@class, "product-details_detailsContainer")]/p[3]' # optional

        #xpath, Optional
        xpathList = [(nameXpath, False),
                     (priceXpath, False),
                     (prevPriceXpath, False),
                     (weightXpath, True)]

        Products.Bacon.addToProduct(xpathList,'xpath')
        Products.Eggs.addToProduct(xpathList,'xpath')
        Products.HeirloomTomatoes.addToProduct(xpathList,'xpath')

class DataCleaner():
    Data = {}
    def cleanUp(self, input, inputType, url):
        self.productType = inputType
        #Define the input as a class global so the array can be used through out the class
        if(self.productType == Products.Bacon.name):
            self.Data = {'Product Type': input[0],
                         'Current Price': input[1],
                         'Orignal Price': input[3],
                         'Weight in lbs': None,
                         'True Weight': input[2],
                         'Brand': None,
                         'Local': None,
                         'Address': None,
                         'State': None, 
                         'City': None, 
                         'Zip Code': None, 
                         'Date Collected': str(datetime(datetime.today().year, datetime.today().month, datetime.today().day))[:-9], 
                         'Url': url
                        }
            if(self.Data['True Weight'] == None):
                self.Data['True Weight'] = self.findWeight()
            if(self.Data['True Weight'] != None):
                self.Data['Weight in lbs'] = self.ozToLb(self.Data['True Weight'])
        elif(self.productType == Products.Eggs.name):
            self.Data = {'Product Type': input[0],
                         'Current Price': input[1],
                         'Orignal Price': input[2],
                         'Amount in dz': None,
                         'True Amount': input[4],
                         'Brand': input[3],
                         'Local': None,
                         'Address': None,
                         'State': None, 
                         'City': None, 
                         'Zip Code': None, 
                         'Date Collected': str(datetime(datetime.today().year, datetime.today().month, datetime.today().day))[:-9], 
                         'Url': url
                        }
            self.eggConverter()

        elif(self.productType == Products.HeirloomTomatoes.name):
            self.Data = {'Product Type': input[0],
                         'Current Price': input[1],
                         'Orignal Price': input[2],
                         'Weight in lbs': None,
                         'True Weight': None,
                         'Brand': input[3],
                         'Organic': None,
                         'Local': None,
                         'Address': None,
                         'State': None, 
                         'City': None, 
                         'Zip Code': None, 
                         'Date Collected': str(datetime(datetime.today().year, datetime.today().month, datetime.today().day))[:-9], 
                         'Url': url
                        }
            self.tomatoModification(input[4], input[5])
        else:
            return None
        self.setLocationalData()
        self.determineLocality()
        self.cleanPricing()
        return list(self.Data.values())
    
    #Helper to reduce code. Splits the string and returns the float value 
    def stringValueExtraction(self, string, stringType):
        value = ''.join(filter(lambda x: x.isdigit() or x == '.', string.split(stringType)[0]))
        if(len(value) == 0):
            return None
        return float(value)
    
    def cleanPricing(self):
        price = ''.join(c for c in self.Data['Current Price'] if c.isdigit() or c == '.')
        if len(price) == 0:
            return
        self.Data['Current Price'] = float(price)
        if self.Data['Orignal Price'] == None:
            self.Data['Orignal Price'] = self.Data['Current Price']
            return
        price = ''.join(c for c in self.Data['Orignal Price'] if c.isdigit() or c == '.')
        if len(price) == 0:
            self.Data['Orignal Price'] = self.Data['Current Price']
        else:
            self.Data['Orignal Price'] = float(price)
        
    def ozToLb(self, input):
        weight = str(input).lower()
        if 'oz' in weight:
            return self.stringValueExtraction(weight, 'oz') / 16.0
        elif '/lb' in weight:
            value = self.stringValueExtraction(weight, '/lb')
            if value == None:
                return 1.0
            return value
        elif 'lb' in weight:
            return self.stringValueExtraction(weight, 'lb')
        return weight

    #Tomatoes are tricky so we have a function that does this part
    def tomatoModification(self, byWeight, size):
        #We can extract Organic from the name
        if self.Data['Organic'] == None:
            if 'organic' in self.Data['Product Type'].lower().replace(' ', ''): # convert to lowercase and remove spaces
                self.Data['Organic'] = 'Organic'
        #This part is for Weight
        if size != None:
            self.Data['True Weight'] = size
        elif byWeight != None:
            self.Data['True Weight'] = byWeight
        elif self.Data['True Weight'] == None:
            #Checking these places for clues
            checkLocations = [self.Data['Current Price'],
                              self.Data['Product Type'],
                              self.Data['Orignal Price']]
            for string in checkLocations:
                if '/ea' in string:
                    self.Data['True Weight'] = f"{self.stringValueExtraction(string, '/ea')}/ea"
                    break
                elif '/lb' in string:
                    weight = self.stringValueExtraction(string, '/lb')
                    self.Data['True Weight'] = f"{weight}/lb"
                    self.Data['Weight in lbs'] = 1.0
                    return
            return
        if '/lb' in self.Data['True Weight']:
            self.Data['Weight in lbs'] = 1.0
        
    #If no weight is given we look at other places that could have what we need
    #This Determines if a list talking about weights in ounces or pounds.
    def findWeight(self):
        #Checking these places for clues
        checkLocations = [self.Data['Current Price'], self.Data['Product Type'], self.Data['Orignal Price']]
        for string in checkLocations:
            if string == None:
                continue
            string = string.lower().replace(' ', '') # convert to lowercase and remove spaces
            if 'pound' in string:
                return  f"{self.stringValueExtraction(string, 'pound')} lb"
            elif 'ounce' in string:
                return f"{self.stringValueExtraction(string, 'ounce')} oz"
            elif '/lb' in string:
                return f"{1.0} lb"
            elif 'lb' in string:
                return f"{self.stringValueExtraction(string, 'lb')} lb"
            elif 'oz' in string:
                return f"{self.stringValueExtraction(string, 'oz')} oz"
        return None

    #Eggs don't have weight so we use amount
    def eggConverter(self):
        if self.Data['True Amount'] == None:
            checkLocations = [self.Data['Product Type'],self.Data['Current Price'],self.Data['Orignal Price']]
            for string in checkLocations:
                string = string.lower().replace(' ', '') # convert to lowercase and remove spaces
                if 'dozen' in string:
                    amount = self.stringValueExtraction(string, 'dozen')
                    if amount == None:
                        self.Data['True Amount'] = f"{1} dz"
                        self.Data['Amount in dz'] = 1.0
                        return
                    self.Data['True Amount'] = f"{amount} dz"  
                    self.Data['Amount in dz'] = amount
                    return  
                if 'dz' in string:
                    amount = self.stringValueExtraction(string, 'dz')
                    self.Data['True Amount'] = f"{amount} dz"
                    self.Data['Amount in dz'] = amount
                    return 
                if 'ct' in string:
                    amount = self.stringValueExtraction(string, 'ct')
                    self.Data['True Amount'] = f"{amount} ct"  
                    self.Data['Amount in dz'] = amount / 12
                    return
        else:
            string = self.Data['True Amount'].lower().replace(' ', '')
            if 'dozen' in string:
                amount = self.stringValueExtraction(string, 'dozen')
                if amount == None:
                    self.Data['Amount in dz'] = 1.0
                    return
                self.Data['Amount in dz'] = amount
            if 'dz' in string:
                self.Data['Amount in dz'] = self.stringValueExtraction(string, 'dz')
            if 'ct' in string:
                self.Data['Amount in dz'] = self.stringValueExtraction(string, 'ct') / 12

    def determineLocality(self):
        localBrands = None
        nonlocalBrands = None
        #For speed we use sets and turn everyting to lowercase and no spaces for accuracy
        if(self.productType == Products.Bacon.name): 
            localBrands = {}
            nonlocalBrands = {}
        elif(self.productType == Products.Eggs.name):
            localBrands = {}
            nonlocalBrands ={}
        elif(self.productType == Products.HeirloomTomatoes.name):
            localBrands = {}
            nonlocalBrands ={}    
            #Sometimes it says it the name 
            if 'local' in self.Data['Product Type'].lower().replace(' ', ''): # convert to lowercase and remove spaces
                self.Data['Local'] = 'Local'
                return
        #Add product brands here
        else:
            #Catch if no local/nonlocal brands are set for the product
            return 
        if self.Data['Brand'] == None:
            #If no brand was set we look at the product name
            brand = self.Data['Product Type'].lower().replace(' ', '')
        else:
            brand = self.Data['Brand'].lower().replace(' ', '')
        if brand in localBrands:
            self.Data['Local'] = "Local"
        elif brand in nonlocalBrands:
            self.Data['Local'] = "Non-local"
        else:
           self.Data['Local'] = "None Listed"
    
    #For Hyvee there are multiple stores however I could not find a way to check each store
    #This is something to be developed (improved upon) in the future
    def setLocationalData(self):
        self.Data['Address'] = 'NA'
        self.Data['State'] = 'NA'
        self.Data['City'] = 'NA'
        self.Data['Zip Code'] = 'NA'

# class DataCleaner():
#     DataArray = []
#     def cleanUp(self, item, url):
#         self.DataArray = item
#         if self.DataArray[3] == None:
#             self.swap_elements(2, 3)
#         self.DataArray.append(url)
#         return self.DataArray
    
#     def swap_elements(self, idx1, idx2):
#         # Make a copy of the input list to avoid modifying it
#         new_lst = self.DataArray.copy()
#         # Swap the elements at the two indices
#         new_lst[idx1], new_lst[idx2] = new_lst[idx2], new_lst[idx1]
#         self.DataArray = new_lst

class HyveeSpider():
    name = "Hyvee"  #The store name 
    spiderLogs = []         #The logs 
    skipped = []            #Skipped data 

    #These are methods that are available for your convences
    def log(self, *args):
        self.spiderLogs.append(('Logger:', args))
        if self.LOGGER:
            print('Logger:', *args)

    def debug(self, *args):
        self.spiderLogs.append(('Debug:', args))
        if self.DEBUGGER:
            print('Debug:', *args)
    
    def printer(self, *args):
        self.spiderLogs.append(('Printer:', args))
        print(*args)
    
    def printLogs(self):
        print("\n< --- Printing Logs --- >\n")
        for entry in self.spiderLogs:
            print(*entry)

    def Logs_to_file(self, filename):
        with open(filename, 'w') as file:
            for log_entry in self.spiderLogs:
                file.write('{} {}\n'.format(log_entry[0], log_entry[1]))
    
    def __init__(self):
        self.DEBUGGER = False #The debugger switch to see whats going on. The Default is False
        self.LOGGER = False #When you need to see everything that happends. The Default is False
        self.attempts = 3 #The number of attempts the spider can retry if an error occurs. Default is 3
        self.waitTime = 10 #The number of seconds WebDriver will wait. Default is 10
        self.count = 0 #This saves the location of the url we are going through
        self.runTime = 0 #Total time of extractions
        self.totalRecoveries = 0 #Number of recoveries made while running
        self.maxRetryCount = 100 #Number of retrys the javascript can make Defualt is 100
        self.cleaner = DataCleaner() #Loads the cleaner
        #Selenium needs a webdriver to work. I chose Firefox however you can do another if you need too
        self.driver = webdriver.Firefox(service=FirefoxService(GeckoDriverManager().install(), log_path=path.devnull))
        self.log("Driver started")
    
    #This handles the restart in case we run into an error
    def restart(self):
        self.driver.quit()
        self.driver = webdriver.Firefox(service=FirefoxService(GeckoDriverManager().install(), log_path=path.devnull))
        self.log("Driver restarted")
    
    #This starts the spider
    def start_requests( self ):
        self.runTime = time.time()
        self.log("Loading from ProductsLoader Class")
        load = ProductsLoader() #Loads all products
        self.dataFrames = load.DataFrames #Adds all dataframes
        self.debug("Products Loaded and Data Frames Added")
        self.debug('\n < --- Setup runtime is %s seconds --- >' % (time.time() - self.runTime))
        self.totalRecoveries = 0 
        #Sweeps through all products
        for product in (Products):
            result = self.requestExtraction(product)
        #Adds the date that the data was scraped
        currentDate = str(datetime(datetime.today().year, datetime.today().month, datetime.today().day))[:-8]
        self.log("Exporting files")
        #Dataframes to CSV files
        for df, product in zip(self.dataFrames, (Products)):
            df.to_csv(currentDate + self.name +" " + product.name + ".csv")
            self.log('\n', df.to_string())
        self.debug('\n < --- Total runtime took %s seconds with %d recoveries --- >' % (time.time() - self.runTime, self.totalRecoveries))
        if len(self.skipped) != 0:
            self.debug('\n < -!- WARNING SKIPPED (' + str(len(self.skipped)) + ') DATA FOUND --->')
        self.Logs_to_file(currentDate + self.name + ' Spider Logs.txt')
        if len(self.skipped) > 0:
            self.debug(self.skipped)
            self.skipHandler(currentDate)      
        self.driver.quit()

    #This handles the extraction request for the inputed product 
    def requestExtraction(self, product):
        self.count = 0
        errors = 0
        start = time.time()
        self.debug("Starting "+ product.name)    
        for trying in range(self.attempts):
            try:
                self.makeRequest(product)
                self.debug(product.name + " Finished")    
                self.log('\n< --- ' + product.name + ' scrape took %s seconds with %d recoveries --- >\n' % ((time.time() - start), errors))
                self.totalRecoveries += errors
                return self.totalRecoveries
            except Exception as e:
                #Note sometimes the browser will closed unexpectedly and theres not we can do but restart the driver
                errors += 1
                self.debug("An error occurred:", e)
                self.debug("Recovering extraction and continueing")
                self.restart() 
        self.debug(product.name + " Did not Finished after " + str(self.attempts) + " Time wasted: %s seconds" % (time.time() - start))
        self.totalRecoveries += errors
        return self.totalRecoveries

    #This handles the reqests for each url and adds the data to the dataframe
    def makeRequest(self, product):
        productUrls = product.value[1]
        total = len(productUrls)
        while self.count < total:
            url = productUrls[self.count]
            self.driver.get(url)
            self.log("Making a request for: ", url)
            item = []
            time.sleep(1) # marionette Error Fix
            for xpath in product.value[2]:
                #Retrying the xpath given the number of attempts
                for attempt in range(self.attempts):
                    data = self.javascriptXpath(xpath[0])
                    if data in {'empty', 'skip'}:
                        #speical case in case you need it
                        if len(xpath) == 3:
                            if xpath[2]:
                                #example would be when there is actually is a '' in the xpath
                                self.debug("xpath marked as speical")
                                item.append(None)
                                data = 'speical'
                                break
                        if xpath[1] and data == 'empty':    
                            #this is where setting the xpath to optional comes in
                            self.debug("xpath wasnt avaliable")
                            item.append(None)
                            break
                        self.debug("Missing item retrying")
                    else:  #Data found
                        item.append(data)
                        self.log(data + ' was added to the list for: ', url)
                        break
                if attempt == self.attempts:
                    data = 'skip'
                if data == 'skip':  #To help clean the data we skip the item with gaps of data 
                    self.debug("An Item has been skipped for: ", url)  
                    item = ['SKIPPED']
                    #Taking the product name  dataframe number and index added as well as the url 
                    #to retry for later 
                    #This could take time to do so we do this at the very end after we made the cvs files
                    self.skipped.append([product, self.count, url])
                    break
            if 'SKIPPED' in item:
                #No point in cleaning skipped items
                items = ['SKIPPED']*(self.dataFrames[product.value[0]].shape[1] - 1)
                items.append(url)
            else:
                #We call the DataCleaner class to handle the cleaning of the data
                #Its best to clean the data before we add it to the data frame
                self.debug('Data cleaning started: ', item)
                items = self.cleaner.cleanUp(item, url)
                self.debug('Data cleaning finished: ', item)
            self.debug('Extracted: ', items)
            self.dataFrames[product.value[0]].loc[len(self.dataFrames[product.value[0]])] = items                    
            self.count += 1
            self.printer(product.name + " item added ", self.count, " of ", total, ":  ", items)

    #Collecting the data from the xpath in JavaScript is faster and results in fewer errors than doing it in python
    #This is where selenium shines because we can both use JavaScript and render JavaScript websites
    #and is the only reason why we use it instead of scrapy
    def javascriptXpath(self, xpath):
        # if the time expires it assumes xpath wasnt found in the page
        try: 
            #Waits for page to load 
            ignored_exceptions=(NoSuchElementException,StaleElementReferenceException)
            elements = WebDriverWait(self.driver, self.waitTime, ignored_exceptions=ignored_exceptions).until(EC.presence_of_all_elements_located((By.XPATH, xpath)))

            # Runs the javascript and collects the text data from the inputed xpath
            # We want to keep repeating if we get any of these outputs becasue the page is still 
            # loading and we dont want to skip or waste time. (for fast computers)
            retrycount = 0
            invalidOutputs = {"error", 'skip' "$nan", ''}
            while retrycount < self.maxRetryCount :
                text = self.driver.execute_script("""
                    const element = document.evaluate(arguments[0], document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
                    if (!element) {
                        return 'skip';
                    }
                    return element.textContent.trim();
                """, 
                xpath)
                checkText = text.replace(" ", "").lower()
                if checkText in invalidOutputs:
                    retrycount+=1
                else:
                    self.log(retrycount, "xpath attempts for (", text, ")")
                    return text
            self.log("xpath attempts count met. Problematic text (" + text + ") for ", xpath)
            return 'skip'
        except TimeoutException:
            self.log('Could not find xpath for: ', xpath)
            return 'empty'

           

    #This is here to hopefully fix skipped data
    #Best case sinarios this will never be used
    def skipHandler(self, currentDate):
        corrections = 0
        # skipped format
        # [product name, DataFrame number, DataFrame index, url]
        while len(self.skipped) != 0:
            #each skip 
            for index, dataSkip in enumerate(self.skipped):
                product = dataSkip[0]
                #Limiting the Attempts to fix while avoiding bottlenecking the problem
                for attempt in range(self.attempts*2):
                    product = dataSkip[0]
                    url = dataSkip[2]
                    self.driver.get(url)
                    self.log("Making a request for: ", url)
                    item = []
                    for xpath in product.value[2]:
                        for attemptIn in range(self.attempts*2):
                            data = self.javascriptXpath(xpath[0])
                            if data in {'empty', 'skip'}:   
                                if xpath[1] and data == 'empty':    
                                    #this is where setting the xpath to optional comes in
                                    self.debug("xpath wasnt avaliable")
                                    item.append(None)
                                    break
                                self.debug("Missing item retrying")
                            else:  #Data found
                                item.append(data)
                                self.log(data + ' was added to the list for: ', url)
                                break
                        if attemptIn == self.attempts*2:
                            data = 'skip'
                            break
                if data == 'skip':  #To help clean the data we skip the item with gaps of data 
                    self.debug("Item still missing attempting other skipped for now") 
                else:
                    items = self.cleaner.cleanUp(item, url)
                    self.dataFrames[dataSkip[1]].loc[dataSkip[2]] = items                    
                    self.printer("Fixed " + product.name + " item: ", items)
                    #To avoid infinite loops and never saving our data we save the file now
                    self.dataFrames[product.value[0]].to_csv(currentDate + "REPAIRED Gateway Market " + product.name + ".csv")
                    self.debug('\n < --- Total runtime with saving of repairs took %s seconds --- >' % (time.time() - self.runTime))
                    self.Logs_to_file(currentDate + self.name + ' Spider REPAIR Logs.txt')
                    #To avoid fixing fixed items we pop, mark, and break
                    self.skipped.pop(index)
                    corrections += 1
                    break
        self.debug('\n < --- Total runtime with all repairs took %s seconds --- >' % (time.time() - self.runTime))
        self.Logs_to_file(currentDate + self.name + ' spider COMPLETED REPAIR Logs.txt')

In [None]:
# Start
#DEBUG Switch
SHOW = True

#Spider setup
spider = HyveeSpider()
spider.LOGGER = True
spider.DEBUGGER = True

#Running the spider
spider.start_requests()

if(SHOW):
    [print(dataFrame) for dataFrame in spider.dataFrames]
    spider.printLogs()
