In [1]:
import os

# Define base directory for the project
BASE_DIR = r"D:\Hotel Pricing Scraper"

# Create the directory if it doesn't exist
os.makedirs(BASE_DIR, exist_ok=True)

print(f"Base directory set to: {BASE_DIR}")


Base directory set to: D:\Hotel Pricing Scraper


In [4]:
def save_raw_html(html, hotel_name, check_in_date, nights):
    safe_name = hotel_name.lower().replace(" ", "_").replace("-", "_")
    filename = f"{safe_name}_raw_{check_in_date}_nights{nights}.html"
    file_path = os.path.join(BASE_DIR, filename)

    with open(file_path, "w", encoding="utf-8") as f:
        f.write(html)

    print(f"Saved HTML to: {file_path}")


In [3]:
import requests
from datetime import datetime, timedelta

# === CONFIGURATION ===
CRAWLBASE_JS_TOKEN = "8i_FkaiD6T7sgW6bijcqSw"
BASE_API_URL = "https://api.crawlbase.com/"

# === FUNCTION TO BUILD DYNAMIC URL (optional, if hotel allows date injection in URL) ===
def build_dynamic_url(base_url, check_in_date, nights=1):
    # Example: Append query string or use other formats as required by the hotel's website
    # Many hotel websites don’t take query params; this is here for extensibility
    return base_url

# === MAIN SCRAPE FUNCTION ===
def scrape_hotel_page(hotel_url, check_in_date=None, nights=1):
    if not check_in_date:
        check_in_date = datetime.today().strftime('%Y-%m-%d')

    final_url = build_dynamic_url(hotel_url, check_in_date, nights)
    
    params = {
        "token": CRAWLBASE_JS_TOKEN,
        "url": final_url,
        "render": "true"
    }

    print(f"Scraping: {final_url}")
    response = requests.get(BASE_API_URL, params=params)

    if response.status_code == 200:
        print("✅ Successfully fetched page.")
        html = response.text
        # Optional: save raw response to file
        save_raw_html(html, hotel_name="Pendry Manhattan West", check_in_date=check_in, nights=nights)
        return html
    else:
        print(f"❌ Error: {response.status_code}")
        print(response.text)
        return None

# === TEST CALL ===
if __name__ == "__main__":
    test_url = "https://www.pendry.com/manhattan-west/"
    check_in = "2025-07-10"
    nights = 1
    scrape_hotel_page(test_url, check_in, nights)


Scraping: https://www.pendry.com/manhattan-west/
✅ Successfully fetched page.
Saved HTML to: D:\Hotel Pricing Scraper\pendry_manhattan_west_raw_2025-07-10_nights1.html


In [6]:
import os
import shutil

# Ensure target directory exists
os.makedirs(r"D:\Hotel Pricing Scraper", exist_ok=True)

# Define source and target paths
source_path = os.path.abspath("Crawlbasehotelscraper.ipynb")
target_path = r"D:\Hotel Pricing Scraper\Crawlbasehotelscraper.ipynb"

# Copy the file to the new location
shutil.copy2(source_path, target_path)

print(f"✅ Notebook copied to: {target_path}")


✅ Notebook copied to: D:\Hotel Pricing Scraper\Crawlbasehotelscraper.ipynb
