In [None]:
import os, time, pathlib, pprint, requests, json
from datetime import datetime, timedelta

EP = os.getenv("BROWSER_ENDPOINT", "http://localhost:8004")
SCRAPED = pathlib.Path("/storage/scraped_data")

def wait_for(job_id, every=3):
    print(f"⏳ Waiting for job {job_id}...")
    while True:
        rec = requests.get(f"{EP}/jobs/{job_id}").json()
        status = rec["status"]
        if status not in {"finished", "error"}:
            print(f"\r⏱️  {rec['status_with_elapsed']}", end="")
        else:
            print(f"\n✅ {status.upper()}")
            return rec
        time.sleep(every)

def submit(task, payload):
    print(f"🚀 Submitting {task} task...")
    print(f"📝 Payload: {json.dumps(payload, indent=2)}")
    r = requests.post(f"{EP}/jobs/{task}", json=payload)
    r.raise_for_status()
    jid = r.json()["job_id"]
    print(f"🆔 Job ID: {jid}")
    return wait_for(jid)

def analyze_results(result, test_name):
    print(f"\n" + "="*60)
    print(f"📊 ANALYSIS: {test_name}")
    print("="*60)
    
    if result["status"] == "error":
        print(f"❌ FAILED: {result.get('error', 'Unknown error')}")
        return
    
    if "result" in result:
        res = result["result"]
        metadata = res.get("search_metadata", {})
        hotels = res.get("hotels", [])
        
        print(f"✅ SUCCESS: Found {len(hotels)} hotels")
        print(f"📍 Location: {metadata.get('location')}")
        print(f"📅 Check-in: {metadata.get('check_in')}")
        print(f"📅 Check-out: {metadata.get('check_out')}")
        print(f"🏨 Nights: {metadata.get('nights')}")
        print(f"🔧 Method: {metadata.get('extraction_method')}")
        
        if hotels:
            avg_price = sum(h.get('price_per_night', 0) for h in hotels) / len([h for h in hotels if h.get('price_per_night')])
            avg_rating = sum(h.get('rating', 0) for h in hotels) / len([h for h in hotels if h.get('rating')])
            
            if avg_price > 0:
                print(f"💰 Avg Price: ${avg_price:.0f}/night")
            if avg_rating > 0:
                print(f"⭐ Avg Rating: {avg_rating:.1f}/10")
                
        print(f"📊 Deep Scraping: {'Yes' if metadata.get('deep_scrape_enabled') else 'No'}")
        print(f"⏱️  Completed at: {metadata.get('search_completed_at')}")
    else:
        print(f"❌ Task failed or returned unexpected result")
        pprint.pp(result)

# Calculate future dates for testing
today = datetime.now()
check_in_1 = (today + timedelta(days=30)).strftime("%Y-%m-%d")
check_out_1 = (today + timedelta(days=33)).strftime("%Y-%m-%d")
check_in_2 = (today + timedelta(days=45)).strftime("%Y-%m-%d")
check_out_2 = (today + timedelta(days=47)).strftime("%Y-%m-%d")
check_in_3 = (today + timedelta(days=60)).strftime("%Y-%m-%d")
check_out_3 = (today + timedelta(days=64)).strftime("%Y-%m-%d")

print(f"📅 Test dates calculated:")
print(f"   Test 1: {check_in_1} to {check_out_1}")
print(f"   Test 2: {check_in_2} to {check_out_2}")
print(f"   Test 3: {check_in_3} to {check_out_3}")

In [3]:
# Test Case 1: Basic search (Riyadh hotels)
basic_search = {
    "location": "Riyadh, Saudi Arabia",
    "check_in": check_in_1,
    "check_out": check_out_1,
    "adults": 2,
    "max_results": 10
}

result_1 = submit("booking-hotels", basic_search)
analyze_results(result_1, "Basic Hotel Search - Riyadh")

🚀 Submitting booking-hotels task...
📝 Payload: {
  "location": "Riyadh, Saudi Arabia",
  "check_in": "2025-09-19",
  "check_out": "2025-09-22",
  "adults": 2,
  "max_results": 10
}
🆔 Job ID: 675a78e03ee047bca893b61b5287d0a2
⏳ Waiting for job 675a78e03ee047bca893b61b5287d0a2...
⏱️  running 1m 48s
✅ FINISHED

📊 ANALYSIS: Basic Hotel Search - Riyadh
✅ SUCCESS: Found 10 hotels
📍 Location: Riyadh, Saudi Arabia
📅 Dates: 2025-09-19 to 2025-09-22
🏨 Nights: 3
💰 Avg Price: $939.0/night
⭐ Avg Rating: 9.3/10
⏱️  Execution Time: 109.1s
📁 Data File: hotels_data.json


In [3]:
# # Test Case 2: Budget hotels with filters
# budget_search = {
#     "location": "Dubai, UAE",
#     "check_in": check_in_2,
#     "check_out": check_out_2,
#     "adults": 1,
#     "max_price": 300,
#     "min_rating": 7.0,
#     "max_results": 15
# }

# result_2 = submit("booking-hotels", budget_search)
# analyze_results(result_2, "Budget Hotels - Dubai")

In [4]:
# # Test Case 3: Luxury family trip
# luxury_search = {
#     "location": "Jeddah, Saudi Arabia",
#     "check_in": check_in_3,
#     "check_out": check_out_3,
#     "adults": 2,
#     "children": 2,
#     "rooms": 2,
#     "min_price": 500,
#     "star_rating": [4, 5],
#     "amenities": ["pool", "wifi", "gym"],
#     "max_results": 20
# }

# result_3 = submit("booking-hotels", luxury_search)
# analyze_results(result_3, "Luxury Family Trip - Jeddah")

In [5]:
# # Test Case 4: Business trip (no reviews needed)
# business_search = {
#     "location": "KAUST, Thuwal, Saudi Arabia",
#     "check_in": check_in_1,
#     "check_out": check_out_1,
#     "adults": 1,
#     "amenities": ["wifi"],
#     "include_reviews": False,
#     "max_results": 5
# }

# result_4 = submit("booking-hotels", business_search)
# analyze_results(result_4, "Business Trip - KAUST Area")

In [6]:
# # Test Case 5: Conference attendee scenario (proximity search)
# conference_search = {
#     "location": "Riyadh International Convention Center, Riyadh",
#     "search_radius": "5km",
#     "check_in": check_in_2,
#     "check_out": check_out_2,
#     "adults": 1,
#     "max_results": 15
# }

# result_5 = submit("booking-hotels", conference_search)
# analyze_results(result_5, "Conference Proximity Search")

In [7]:
# # Test Summary
# print("📋 BOOKING HOTELS TASK - TEST SUMMARY")
# print("="*60)

# test_results = [
#     ("Basic Search - Riyadh", result_1),
#     ("Budget Hotels - Dubai", result_2),
#     ("Luxury Family - Jeddah", result_3),
#     ("Business Trip - KAUST", result_4),
#     ("Conference Proximity", result_5)
# ]

# successful_tests = 0
# failed_tests = 0
# total_hotels = 0
# total_execution_time = 0

# for test_name, result in test_results:
#     if result["status"] == "finished" and result.get("result", {}).get("success"):
#         successful_tests += 1
#         hotels_found = result["result"].get("hotels_found", 0)
#         exec_time = result["result"].get("execution_time_seconds", 0)
#         total_hotels += hotels_found
#         total_execution_time += exec_time
#         print(f"✅ {test_name}: {hotels_found} hotels ({exec_time}s)")
#     else:
#         failed_tests += 1
#         error_msg = result.get("error", "Unknown error")
#         print(f"❌ {test_name}: FAILED - {error_msg}")

# print("\n📊 OVERALL STATISTICS:")
# print(f"   ✅ Successful tests: {successful_tests}/{len(test_results)}")
# print(f"   ❌ Failed tests: {failed_tests}/{len(test_results)}")
# print(f"   🏨 Total hotels collected: {total_hotels}")
# print(f"   ⏱️  Total execution time: {total_execution_time:.1f}s")
# print(f"   📁 Data files saved to: {SCRAPED}/booking-hotels/")

# if successful_tests == len(test_results):
#     print("\n🎉 ALL TESTS PASSED! The booking-hotels task is working perfectly.")
# elif successful_tests >= len(test_results) * 0.75:
#     print("\n✅ Most tests passed. The booking-hotels task is working well with minor issues.")
# else:
#     print("\n⚠️  Multiple tests failed. The booking-hotels task needs debugging.")

# print("\n🔍 To examine detailed results, check the JSON files in the data directories.")

In [8]:
# Test Case 6: IMPROVED DATA EXTRACTION - High Rating Filter Test
print("🧪 Testing IMPROVED data extraction with strict min_rating filter...")

improved_test = {
    "location": "Dubai, UAE",
    "check_in": check_in_2,
    "check_out": check_out_2,
    "adults": 1,
    "min_rating": 8.0,  # High rating requirement to test strict filtering
    "max_results": 5    # Small number for quick validation
}

result_improved = submit("booking-hotels", improved_test)
analyze_results(result_improved, "IMPROVED Data Extraction - High Rating Filter")

# Let's also check the actual hotel data to validate filter enforcement
if result_improved["status"] == "finished" and result_improved["result"].get("success"):
    data_file = f"/storage/scraped_data/booking-hotels/{result_improved['result']['data_file']}"
    try:
        with open(data_file, 'r') as f:
            hotels_data = json.load(f)
            
        print(f"\n🔍 DETAILED VALIDATION:")
        print(f"📊 Total hotels in file: {len(hotels_data)}")
        
        for i, hotel in enumerate(hotels_data[:3], 1):  # Check first 3 hotels
            print(f"\n🏨 Hotel #{i}: {hotel.get('name', 'Unknown')}")
            print(f"   💰 Price: ${hotel.get('price_per_night', 'N/A')}/night")
            print(f"   ⭐ Rating: {hotel.get('rating', 'N/A')}/10")
            print(f"   📝 Reviews: {hotel.get('review_count', 'N/A')} reviews")
            print(f"   📍 Address: {hotel.get('address', 'N/A')}")
            print(f"   📏 Distance: {hotel.get('distance_to_center', 'N/A')}")
            print(f"   🔗 URL: {hotel.get('booking_url', 'N/A')}")
            
            # Validate filter compliance
            rating = hotel.get('rating')
            if rating:
                if rating >= 8.0:
                    print(f"   ✅ FILTER OK: Rating {rating} >= 8.0")
                else:
                    print(f"   ❌ FILTER VIOLATION: Rating {rating} < 8.0")
            else:
                print(f"   ⚠️  NO RATING: Should be filtered out!")
                
    except Exception as e:
        print(f"❌ Could not read hotel data file: {e}")
        
print("\n" + "="*80)

🧪 Testing IMPROVED data extraction with strict min_rating filter...
🚀 Submitting booking-hotels task...
📝 Payload: {
  "location": "Dubai, UAE",
  "check_in": "2025-10-04",
  "check_out": "2025-10-06",
  "adults": 1,
  "min_rating": 8.0,
  "max_results": 5
}
🆔 Job ID: f5ef0ffce922468bb43acf34cf30a2bc
⏳ Waiting for job f5ef0ffce922468bb43acf34cf30a2bc...
⏱️  running 1m 51s
✅ FINISHED

📊 ANALYSIS: IMPROVED Data Extraction - High Rating Filter
✅ SUCCESS: Found 5 hotels
📍 Location: Dubai, UAE
📅 Dates: 2025-10-04 to 2025-10-06
🏨 Nights: 2
⏱️  Execution Time: 113.6s
📁 Data File: hotels_data.json
❌ Could not read hotel data file: [Errno 2] No such file or directory: '/storage/scraped_data/booking-hotels/hotels_data.json'

