In [1]:
import time
import math
import copy
import pandas as pd
from datetime import datetime, timedelta
from models import CalendarDayState, AirBnbRoomCalendarDay
import logging

from my_webdriver import driver_setup

MAX_WAIT_FOR_TRANSLATION_ON_POPUP_SEC = 5
MAX_WAIT_FOR_COOKIES_POPUP_SEC = 5
NUMBER_ON_MONTHS_IN_FUTURE_TO_CHECK = 6
MONTHS_PRESENT_IN_ONE_ELEMENT = 4
TIME_SLEEP_AFTER_CAL_NEXT_CLICK_SEC = 0.2
NUMBER_CAL_FETCHES_NEEDED = math.ceil(
    NUMBER_ON_MONTHS_IN_FUTURE_TO_CHECK / MONTHS_PRESENT_IN_ONE_ELEMENT
)

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', 
    datefmt='%Y-%m-%d %H:%M:%S',  # Customize the time format (optional)
    force=True,
)
logger = logging.getLogger(__name__)


from  selenium_airbnb_calendar_scraper import close_translation_popup_if_exists,close_cookie_banner_if_exists,parse_date,get_calendar_table_from_driver,is_day_disabled,clear_dates,get_two_visible_tables,parse_from_day_button_aria_label_to_state, first_day_of_month,get_all_cells_from_table, parse_pricing_from_pricing_form,  from_pricing_elements_to_pricing_dict, calculate_mean, get_two_visible_tables_with_retry, next_month, get_state_and_num_min_nights_of_given_date, get_smallest_stay_interval_and_pricing_dict, enrich_calendar_days_details_if_data_is_available, generate_airbnb_calendar_day_list


In [3]:
driver = driver_setup(settings={"headless": False})
ROOM_ID = 14132224
ROOM_URL_FOR_TEST=  f"https://www.airbnb.com/rooms/{ROOM_ID}?adults=2" # "https://www.airbnb.com/rooms/634438216271572667?adults=2"
NUMBER_ON_MONTHS_IN_FUTURE_TO_CHECK = 6

logger.info("[%s] getting room", ROOM_ID)
driver.get(ROOM_URL_FOR_TEST)
logger.info("[%s] room gotten", ROOM_ID)
close_translation_popup_if_exists(driver, ROOM_ID)
logger.info("[%s] close_translation_popup_if_exists over", ROOM_ID)
close_cookie_banner_if_exists(driver, ROOM_ID)
logger.info("[%s] close_cookie_banner_if_exists over", ROOM_ID)


calendar_days_details_empty_template = {'current_date_state':None,'minimum_stay_nights':None,'latest_prices_array':[],'cleaning_fee':None,'currency':None,'extra_attributes':{},'price':None}
calendar_days_details = {}
old_visible_table_one_string = None
for num_nexts_to_click in range(NUMBER_ON_MONTHS_IN_FUTURE_TO_CHECK):
    old_visible_table_one_string, first_visible_table, second_visible_table = get_two_visible_tables_with_retry(driver, old_visible_table_one_string,ROOM_ID, sleep_after_retry_sec=1, max_retries=3)
    logger.info("[%s] old_visible_table_one_string: %s, first_visible_table: %s, second_visible_table: %s", ROOM_ID,old_visible_table_one_string, first_visible_table,second_visible_table)
    second_visible_table_cells = None
    first_visible_table_cells = get_all_cells_from_table(first_visible_table)
    for first_table_cell_index,first_table_cell in enumerate(first_visible_table_cells):
        date_button_aria_label = first_table_cell.get_attribute("aria-label")
        date_button_date = parse_date(date_button_aria_label.split(".", 1)[0])
        # time.sleep(10000000)
        current_date_state, num_nights = get_state_and_num_min_nights_of_given_date(date_button_aria_label,first_table_cell,first_visible_table,driver, ROOM_ID)
        logger.info("[%s] date_button_date: %s. current_date_state: %s. num_nights: %s.",ROOM_ID,date_button_date, current_date_state, num_nights)
        pricing_dict, second_visible_table_cells = get_smallest_stay_interval_and_pricing_dict(current_date_state,num_nights, date_button_date, second_visible_table, second_visible_table_cells, first_visible_table_cells, first_table_cell, first_table_cell_index, driver, ROOM_ID)
        calendar_days_details.setdefault(date_button_date, copy.deepcopy(calendar_days_details_empty_template))
        calendar_days_details[date_button_date]['current_date_state'] = current_date_state
        calendar_days_details[date_button_date]['minimum_stay_nights'] = num_nights
        calendar_days_details, pricing_dict = enrich_calendar_days_details_if_data_is_available(calendar_days_details_empty_template, current_date_state, pricing_dict, calendar_days_details, date_button_date, num_nights)

    if (num_nexts_to_click+1) < NUMBER_ON_MONTHS_IN_FUTURE_TO_CHECK:
        next_month(driver)
    time.sleep(1)

2024-11-14 22:05:19 - INFO - __main__ - [14132224] getting room
2024-11-14 22:05:21 - INFO - __main__ - [14132224] room gotten
2024-11-14 22:05:24 - INFO - __main__ - [14132224] close_translation_popup_if_exists over
2024-11-14 22:05:29 - INFO - selenium_airbnb_calendar_scraper - [14132224] No 'cookies banner' form was found
2024-11-14 22:05:29 - INFO - __main__ - [14132224] close_cookie_banner_if_exists over
2024-11-14 22:05:29 - INFO - selenium_airbnb_calendar_scraper - [14132224] table is not visible. Will will go to next one
2024-11-14 22:05:29 - INFO - selenium_airbnb_calendar_scraper - [14132224] visible table number 1 is for month November 2024
2024-11-14 22:05:29 - INFO - selenium_airbnb_calendar_scraper - [14132224] visible table number 2 is for month December 2024
2024-11-14 22:05:29 - INFO - selenium_airbnb_calendar_scraper - [14132224] table is not visible. Will will go to next one
2024-11-14 22:05:29 - INFO - __main__ - [14132224] old_visible_table_one_string: November 202

In [4]:
calendar_days_details_models = generate_airbnb_calendar_day_list(calendar_days_details,ROOM_ID)

In [8]:
from models import Base, db_url, save_or_update_airbnb_date
import sqlalchemy
from sqlalchemy.orm import Session
engine = sqlalchemy.create_engine(
    db_url, echo=False
)  # We have also specified a parameter create_engine.echo, which will instruct the Engine to log all of the SQL it emits to a Python logger that will write to standard out.
Base.metadata.create_all(engine)

session = Session(engine)
for calendar_days_details in calendar_days_details_models:
    save_or_update_airbnb_date(session=session, new_instance=i)
    
session.commit()

In [9]:
calendar_days_details

<models.AirBnbRoomCalendarDay at 0x10647e7d0>

In [6]:
len(calendar_days_details_models)

182