In [32]:
from datetime import datetime, timedelta
from json import loads, JSONDecodeError
from os import _exit, getenv, makedirs, path
from time import localtime, sleep, strftime, time

from dotenv import load_dotenv
from openpyxl import load_workbook
import pandas as pd
from seleniumwire import webdriver
from seleniumwire.utils import decode
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException, ElementNotInteractableException
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager

In [33]:
fb_mkp_ropa = {
    "Fecha Extraccion": [],
    "titulo_marketplace": [],
    "tiempo_creacion": [],
    "tipo_delivery": [],
    "delivery_data": [],
    "delivery_direccion": [],
    "descripcion": [],
    "disponible": [],
    "vendido": [],
    "fecha_union_vendedor": [],
    "cantidad": [],
    "precio": [],
    "tipo_moneda": [],
    "amount_with_concurrency": [],
    "latitud": [],
    "longitud": [],
    "locacion": [],
    "locacion_id": [],
    "name_vendedor": [],
    "tipo_vendedor": [],
    "id_vendedor": []
}

In [34]:
fb_mkp_ropa_time = {
    "Fecha" : None,
    "Hora Inicio": None,
    "Hora Termino": None,
    "Cantidad": None,
    "Tiempo(HHMMSS)": None,
    "Productos/min": None,
    "Enlace": None,
    "Observaciones": None,
}

In [35]:
class ScraperFb:
    """Representa a un bot para hacer web scarping en fb marketplace.

    Attributes:
        driver (Object): Maneja un navegador para hacer web scraping
        wait (Object): Maneja el Tiempo de espera durante la ejecución del bot
    """
    
    def __init__(self):
        """Inicializa un objeto de tipo ScraperFb.

        Args:
            driver (Object): [Driver]
            wait (Object): [Wait]
        """
        chrome_options = webdriver.ChromeOptions()
        prefs = {"profile.default_content_setting_values.notifications" : 2}
        chrome_options.add_experimental_option("prefs",prefs)
        self.driver = webdriver.Chrome(chrome_options=chrome_options,service=Service(ChromeDriverManager().install()))
        self.wait = WebDriverWait(self.driver, 10)

    def iniciar_sesion(self, url):
        """Inicia sesión en una página web usando un usuario y contraseña

        Args:
            url (str): [Url]
        """
        self.driver.get(url)
        self.driver.maximize_window()
        username = self.wait.until(EC.presence_of_element_located((By.ID, "email")))
        password = self.wait.until(EC.presence_of_element_located((By.ID, "pass")))
        username.clear()
        password.clear()
        username.send_keys(getenv('FB_USERNAME'))
        password.send_keys(getenv('FB_PASSWORD'))
        self.wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button[name='login']"))).click()
    
    def mapear_datos(self, url):
        sleep(10)
        self.driver.execute_script("window.open('about:blank', 'newtab');")
        self.driver.switch_to.window("newtab")
        self.driver.get(url)
        
        sleep(8)        
        ropa = self.driver.find_elements(By.XPATH, '//*[@class="xt7dq6l xl1xv1r x6ikm8r x10wlt62 xh8yej3"]')
        fecha_publicacion = fecha_extraccion = int(datetime.strptime(fb_mkp_ropa_time["Fecha"],"%d/%m/%Y").timestamp())
        fecha_flag = fecha_extraccion + 86400
        i=0
        e=0
        while fecha_publicacion >= fecha_extraccion:
            print("Scrapeando item", i + 1)
            try:
                ropa[i].click()
                sleep(5)
                for request in self.driver.requests:
                    if not request.response or 'graphql' not in request.url:
                        continue
                    
                    body = decode(request.response.body, request.response.headers.get('Content-Encoding', 'identity'))
                    decoded_body = body.decode('utf-8')
                    json_data = loads(decoded_body)
                    
                    if 'prefetch_uris_v2' not in json_data['extensions']:
                        continue

                    fecha_publicacion = json_data['data']['viewer']['marketplace_product_details_page']['target']['creation_time']
                    print(fecha_publicacion)
                    if fecha_publicacion < fecha_flag:
                        dato = json_data['data']['viewer']['marketplace_product_details_page']
                        print(dato["target"]["marketplace_listing_title"])
                        self.extraer_datos(dato, fb_mkp_ropa_time["Fecha"])
                    break
                self.driver.execute_script("window.history.go(-1)");
                
            except (NoSuchElementException, JSONDecodeError, StaleElementReferenceException) as error:
                print("Error:",error)
                print('No se hallo el item N '+str(i + 1)+'se pasará al siguiente')
                e=e+1
                
            except (KeyError, ElementNotInteractableException) as error:
                print("Error:",error)
                print('No se puede obtener la data del item N '+str(i + 1)+'se pasará al siguiente')
                e=e+1
                self.driver.execute_script("window.history.go(-1)")
                
            except Exception as error:
                print("Error:", error)
                e = e + 1
                self.guardar_datos()
                _exit(0)
            i = i + 1
            if i == len(ropa):
                self.driver.execute_script('window.scrollTo(0, document.body.scrollHeight)')
                sleep(7)
                ropa = self.driver.find_elements(By.XPATH, '//*[@class="xt7dq6l xl1xv1r x6ikm8r x10wlt62 xh8yej3"]')
            del self.driver.requests
            sleep(3)
            print('-------------------------------------------------------------------')
        fb_mkp_ropa_time["Cantidad"]= i - e
        print("Se halló", e, "errores")
        print('-------------------------------------------------------------------')
        print('Fin de la extraccion')
        print('-------------------------------------------------------------------')
    
    def extraer_datos(self, item, fecha_extraccion):
        fb_mkp_ropa["titulo_marketplace"].append(item['target'].get('marketplace_listing_title'))
        fb_mkp_ropa["tiempo_creacion"].append(item['target'].get('creation_time'))
        fb_mkp_ropa["disponible"].append(item['target'].get('is_live'))
        fb_mkp_ropa["vendido"].append(item['target'].get('is_sold'))
        fb_mkp_ropa["cantidad"].append(item['target'].get('listing_inventory_type'))
        fb_mkp_ropa["name_vendedor"].append(item['target'].get('story').get('actors')[0].get('name'))
        fb_mkp_ropa["tipo_vendedor"].append(item['target'].get('story').get('actors')[0]['__typename'])
        fb_mkp_ropa["id_vendedor"].append(item['target'].get('story').get('actors')[0]['id'])
        fb_mkp_ropa["locacion_id"].append(item['target'].get('location_vanity_or_id'))
        fb_mkp_ropa["latitud"].append(item['target'].get('location', {}).get('latitude'))
        fb_mkp_ropa["longitud"].append(item['target'].get('location', {}).get('longitude'))
        fb_mkp_ropa["precio"].append(item['target'].get('listing_price', {}).get('amount'))
        fb_mkp_ropa["tipo_moneda"].append(item['target'].get('listing_price', {}).get('currency'))
        fb_mkp_ropa["amount_with_concurrency"].append(item['target'].get('listing_price', {}).get('amount_with_offset_in_currency'))
        fb_mkp_ropa["tipo_delivery"].append(item['target'].get('delivery_types', [None])[0])
        fb_mkp_ropa["delivery_data"].append(item['target'].get("delivery_data", {}).get('carrier'))
        fb_mkp_ropa["delivery_direccion"].append(item['target'].get("delivery_data", {}).get('delivery_address'))
        fb_mkp_ropa["descripcion"].append(item['target'].get('redacted_description', {}).get('text'))
        fb_mkp_ropa["fecha_union_vendedor"].append(item['target'].get('marketplace_listing_seller', {}).get('join_time'))  
        data = item['target'].get('location_text', {})
        if data:
            data = data.get('text')
        fb_mkp_ropa["locacion"].append(data)
        fb_mkp_ropa["Fecha Extraccion"].append(fecha_extraccion)
    
    def guardar_datos(self):
        df_fb_mkp_ropa = pd.DataFrame(fb_mkp_ropa)
        df_fb_mkp_ropa.drop(len(df_fb_mkp_ropa)-1, axis=0, inplace=True)
        fb_mkp_ropa_time["Cantidad"] = len(df_fb_mkp_ropa)
        datetime_obj = datetime.strptime(fb_mkp_ropa_time["Fecha"],"%d/%m/%Y")
        filepath = "Data/" + datetime_obj.strftime('%d-%m-%Y') + "/"
        filename = "fb_ropa_" + datetime_obj.strftime('%d%m%Y') + "_" + str(fb_mkp_ropa_time["Cantidad"]) + ".xlsx"
        if not path.exists(filepath):
            makedirs(filepath)
        df_fb_mkp_ropa.to_excel(filepath + filename, index = False)
        print("Datos Guardados Correctamente")
        
    def guardar_tiempos(self, filename, sheet_name):
        tiempos = load_workbook(filename)
        header_exist = True
        if sheet_name not in [ws.title for ws in tiempos.worksheets]:
            tiempos.create_sheet(sheet_name)
            header_exist = False
        worksheet = tiempos[sheet_name]
        if not header_exist:
            worksheet.append(list(fb_mkp_ropa_time.keys()))
        worksheet.append(list(fb_mkp_ropa_time.values()))
        tiempos.save(filename)
        tiempos.close()
        print("Tiempos Guardados Correctamente")

In [36]:
def set_params_inicio():
    print("Estableciendo parámetros de inicio")
    fb_mkp_ropa_time["Fecha"] = (datetime.now().date() - timedelta(days=1)).strftime('%d/%m/%Y')
    start = time()
    fb_mkp_ropa_time["Hora Inicio"] = strftime("%H:%M:%S", localtime(start))
    print("Hora de inicio:",fb_mkp_ropa_time["Hora Inicio"])
    return start

def set_params_final(start):
    print("Estableciendo parámetros finales")
    end = time()
    fb_mkp_ropa_time["Hora Termino"] = strftime("%H:%M:%S", localtime(end))
    print("Hora Termino:",fb_mkp_ropa_time["Hora Termino"])
    total = end - start
    print("Duracion: ",total, 'seconds')
    fb_mkp_ropa_time["Tiempo(HHMMSS)"] = str(timedelta(seconds=total)).split(".")[0]
    fb_mkp_ropa_time["Productos/min"] = int(fb_mkp_ropa_time["Cantidad"]/(total / 60))

In [37]:
def main():
    # Cargar variables de entorno
    load_dotenv()
    
    # Estabbleciendo hora y fecha de inicio de la extracción
    start = set_params_inicio()
    
    # Url base a scrapear
    url_base = 'https://www.facebook.com/'
    url_ropa = '"https://www.facebook.com/marketplace/category/apparel/?sortBy=creation_time_descend&exact=false"'
    
    # Parámetros para guardar la medición de la ejecución del scraper
    filename_tiempos = 'Tiempos.xlsx'
    sheet_tiempos = "Ropa"
    
    scraper = ScraperFb()
    scraper.iniciar_sesion(url_base)
    scraper.mapear_datos(url_ropa)
    scraper.guardar_datos()
    
    set_params_final(start)
    scraper.guardar_tiempos(filename_tiempos, sheet_tiempos)

In [38]:
if __name__ == '__main__':
    main()

Hora de inicio: 03:45:32
Scrapeando item 1
Error: 'marketplace_product_details_page'
No se puede obtener la data del item N 1se pasará al siguiente
-------------------------------------------------------------------
Scrapeando item 2
1673103498
-------------------------------------------------------------------
Scrapeando item 3
1673103373
-------------------------------------------------------------------
Scrapeando item 4
1673103344
-------------------------------------------------------------------
Scrapeando item 5
1673103339
-------------------------------------------------------------------
Scrapeando item 6
1673103338
-------------------------------------------------------------------
Scrapeando item 7
1673103299
-------------------------------------------------------------------
Scrapeando item 8
1673103285
-------------------------------------------------------------------
Scrapeando item 9
1673103246
-------------------------------------------------------------------
Scrapean

1673101763
-------------------------------------------------------------------
Scrapeando item 75
1673101726
-------------------------------------------------------------------
Scrapeando item 76
1673101723
-------------------------------------------------------------------
Scrapeando item 77
1673101723
-------------------------------------------------------------------
Scrapeando item 78
1673101674
-------------------------------------------------------------------
Scrapeando item 79
1673101662
-------------------------------------------------------------------
Scrapeando item 80
1673101647
-------------------------------------------------------------------
Scrapeando item 81
1673101574
-------------------------------------------------------------------
Scrapeando item 82
1673101556
-------------------------------------------------------------------
Scrapeando item 83
1673101534
-------------------------------------------------------------------
Scrapeando item 84
1673101518
---------

-------------------------------------------------------------------
Scrapeando item 148
1673099208
-------------------------------------------------------------------
Scrapeando item 149
1673099164
-------------------------------------------------------------------
Scrapeando item 150
1673099005
-------------------------------------------------------------------
Scrapeando item 151
1673099075
-------------------------------------------------------------------
Scrapeando item 152
1673098751
-------------------------------------------------------------------
Scrapeando item 153
1673098744
-------------------------------------------------------------------
Scrapeando item 154
1673098699
-------------------------------------------------------------------
Scrapeando item 155
1673098692
-------------------------------------------------------------------
Scrapeando item 156
1673098649
-------------------------------------------------------------------
Scrapeando item 157
1673098540
----------

1673088424
-------------------------------------------------------------------
Scrapeando item 231
1673086843
-------------------------------------------------------------------
Scrapeando item 232
1673086628
-------------------------------------------------------------------
Scrapeando item 233
1673086275
-------------------------------------------------------------------
Scrapeando item 234
1673085744
-------------------------------------------------------------------
Scrapeando item 235
1673085472
-------------------------------------------------------------------
Scrapeando item 236
1673084507
-------------------------------------------------------------------
Scrapeando item 237
1673075873
-------------------------------------------------------------------
Scrapeando item 238
1673075130
-------------------------------------------------------------------
Scrapeando item 239
1673074095
-------------------------------------------------------------------
Scrapeando item 240
1673073780

1673061371
Jean strech talla 32 mujer
-------------------------------------------------------------------
Scrapeando item 303
1673061369
Vestido Mossimo
-------------------------------------------------------------------
Scrapeando item 304
1673061356
Polos Oversize De Hombre 
Precio :38 
 Nuevo Ocasion
Polo con rayas :  L
Polo verde : L
Camisa : M
-------------------------------------------------------------------
Scrapeando item 305
1673061208
Vestido verdecito
-------------------------------------------------------------------
Scrapeando item 306
1673060740
Falda shor
-------------------------------------------------------------------
Scrapeando item 307
1673060588
Vestido casual
-------------------------------------------------------------------
Scrapeando item 308
1673060584
Wetsuit Mohana￼ ￼talla M
-------------------------------------------------------------------
Scrapeando item 309
1673060521
Zapatillas Adidas
-------------------------------------------------------------------

1673051121
Jogger
-------------------------------------------------------------------
Scrapeando item 368
1673051121
Top
-------------------------------------------------------------------
Scrapeando item 369
1673050302
Adidas Superstar
-------------------------------------------------------------------
Scrapeando item 370
1673050250
Zapatillas Nike ORIGINALES
-------------------------------------------------------------------
Scrapeando item 371
1673050148
Jordan 3 pine green originales con caja
-------------------------------------------------------------------
Scrapeando item 372
1673049820
SUPLEX
-------------------------------------------------------------------
Scrapeando item 373
1673049798
Bellas Sandalias Chunkis
-------------------------------------------------------------------
Scrapeando item 374
1673049783
Zapatilla Adidas
-------------------------------------------------------------------
Scrapeando item 375
1673049778
Zapatillas de mujer Adidas 8 y medio
----------------

1673040031
CASACA DE MEZCLILLA PARA HOMBRE 
MARCA LEVI'S  SOLO TALLA M
-------------------------------------------------------------------
Scrapeando item 425
1673039982
Polos Oversize
-------------------------------------------------------------------
Scrapeando item 426
1673039896
IZIPAY
-------------------------------------------------------------------
Scrapeando item 427
1673039838
Camisa original
-------------------------------------------------------------------
Scrapeando item 428
1673039644
Vestido con brillos
-------------------------------------------------------------------
Scrapeando item 429
1673039388
Falda y top 
-------------------------------------------------------------------
Scrapeando item 430
1673039339
hermosos enterizos cebras
-------------------------------------------------------------------
Scrapeando item 431
1673039192
Liquidación de sandalias
-------------------------------------------------------------------
Scrapeando item 432
1673039118
Vestidos elegan

1673028333
Zapatilla New Atlethic
-------------------------------------------------------------------
Scrapeando item 489
1673027928
REMATO CASACA MARQUIS PARA DAMA
-------------------------------------------------------------------
Scrapeando item 490
1673027838
Polos Chicago bulls
-------------------------------------------------------------------
Scrapeando item 491
1673027815
Jeans 
-------------------------------------------------------------------
Scrapeando item 492
1673027788
DASHA
-------------------------------------------------------------------
Scrapeando item 493
1673027689
Enterizo Denimlab a 30 soles 💓💕
-------------------------------------------------------------------
Scrapeando item 494
1673027546
Modelo exclusivo de Conjunto palazo y blusa seda francesa
-------------------------------------------------------------------
Scrapeando item 495
1673027407
Remató pantalón hombre GUESS traído de USA, talla 32 , muy buen estado. Tal cual fotos . S/75
------------------------

1673017554
Falda negra talla m
-------------------------------------------------------------------
Scrapeando item 552
1673017286
Nautica hombre polo
-------------------------------------------------------------------
Scrapeando item 553
1673016789
Dickies camisa hombre
-------------------------------------------------------------------
Scrapeando item 554
1673016785
Closet sale remate
-------------------------------------------------------------------
Scrapeando item 555
1673016557
Closet sale remate
-------------------------------------------------------------------
Scrapeando item 556
1673016211
Vestido Gasa Manga Larga
-------------------------------------------------------------------
Scrapeando item 557
1673015927
Remato Force Talla 41 (detalle)
-------------------------------------------------------------------
Scrapeando item 558
1673015911
Remató bividi Nike
-------------------------------------------------------------------
Scrapeando item 559
1673015698
TOPS SATINADOS
------

AttributeError: 'str' object has no attribute 'exists'